Skip to content

Commit

Permalink
Change parser to return RegexMethod
Browse files Browse the repository at this point in the history
Clean up how the data is structured.
  • Loading branch information
stephentoub committed Mar 10, 2022
1 parent 4258d1e commit 5fb55c8
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,23 @@ namespace System.Text.RegularExpressions.Generator
public partial class RegexGenerator
{
/// <summary>Emits the definition of the partial method. This method just delegates to the property cache on the generated Regex-derived type.</summary>
private static void EmitRegexPartialMethod(RegexType regexClass, IndentedTextWriter writer, string generatedClassName, int id)
private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedTextWriter writer, string generatedClassName)
{
// Emit the namespace
if (!string.IsNullOrWhiteSpace(regexClass.Namespace))
// Emit the namespace.
RegexType? parent = regexMethod.DeclaringType;
if (!string.IsNullOrWhiteSpace(parent.Namespace))
{
writer.WriteLine($"namespace {regexClass.Namespace}");
writer.WriteLine($"namespace {parent.Namespace}");
writer.WriteLine("{");
writer.Indent++;
}

// Emit containing types
RegexType? parent = regexClass.ParentClass;
// Emit containing types.
var parentClasses = new Stack<string>();
while (parent is not null)
{
parentClasses.Push($"partial {parent.Keyword} {parent.Name}");
parent = parent.ParentClass;
parent = parent.Parent;
}
while (parentClasses.Count != 0)
{
Expand All @@ -50,12 +50,11 @@ private static void EmitRegexPartialMethod(RegexType regexClass, IndentedTextWri
writer.Indent++;
}

// Emit the direct parent type, including the partial method definition
writer.WriteLine($"partial {regexClass.Keyword} {regexClass.Name}");
writer.WriteLine("{");
writer.Indent++;
// Emit the partial method definition.
writer.WriteLine($"[global::System.CodeDom.Compiler.{s_generatedCodeAttribute}]");
writer.WriteLine($"{regexClass.Method.Modifiers} global::System.Text.RegularExpressions.Regex {regexClass.Method.MethodName}() => global::{GeneratedNamespace}.{generatedClassName}.{regexClass.Method.MethodName}_{id}.Instance;");
writer.WriteLine($"{regexMethod.Modifiers} global::System.Text.RegularExpressions.Regex {regexMethod.MethodName}() => global::{GeneratedNamespace}.{generatedClassName}.{regexMethod.GeneratedName}.Instance;");

// Unwind all scopes
while (writer.Indent != 0)
{
writer.Indent--;
Expand All @@ -69,7 +68,7 @@ private static void EmitRegexPartialMethod(RegexType regexClass, IndentedTextWri
{
writer.WriteLine($"/// <summary>Caches a <see cref=\"Regex\"/> instance for the {rm.MethodName} method.</summary>");
writer.WriteLine($"/// <remarks>A custom Regex-derived type could not be generated because {reason}.</remarks>");
writer.WriteLine($"internal sealed class {rm.MethodName}_{id} : Regex");
writer.WriteLine($"internal sealed class {rm.GeneratedName} : Regex");
writer.WriteLine($"{{");
writer.WriteLine($" /// <summary>Cached, thread-safe singleton instance.</summary>");
writer.WriteLine($" internal static Regex Instance {{ get; }} = new({Literal(rm.Pattern)}, {Literal(rm.Options)}, {GetTimeoutExpression(rm.MatchTimeout)});");
Expand All @@ -81,13 +80,13 @@ private static void EmitRegexPartialMethod(RegexType regexClass, IndentedTextWri
IndentedTextWriter writer, RegexMethod rm, int id, string runnerFactoryImplementation)
{
writer.WriteLine($"/// <summary>Custom <see cref=\"Regex\"/>-derived type for the {rm.MethodName} method.</summary>");
writer.WriteLine($"internal sealed class {rm.MethodName}_{id} : Regex");
writer.WriteLine($"internal sealed class {rm.GeneratedName} : Regex");
writer.WriteLine($"{{");
writer.WriteLine($" /// <summary>Cached, thread-safe singleton instance.</summary>");
writer.WriteLine($" internal static {rm.MethodName}_{id} Instance {{ get; }} = new();");
writer.WriteLine($" internal static {rm.GeneratedName} Instance {{ get; }} = new();");
writer.WriteLine($"");
writer.WriteLine($" /// <summary>Initializes the instance.</summary>");
writer.WriteLine($" private {rm.MethodName}_{id}()");
writer.WriteLine($" private {rm.GeneratedName}()");
writer.WriteLine($" {{");
writer.WriteLine($" base.pattern = {Literal(rm.Pattern)};");
writer.WriteLine($" base.roptions = {Literal(rm.Options)};");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,13 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
string? ns = regexMethodSymbol.ContainingType?.ContainingNamespace?.ToDisplayString(
SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted));

var regexType = new RegexType(
typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText,
ns ?? string.Empty,
$"{typeDec.Identifier}{typeDec.TypeParameterList}");

var regexMethod = new RegexMethod(
regexType,
methodSyntax,
regexMethodSymbol.Name,
methodSyntax.Modifiers.ToString(),
Expand All @@ -201,28 +207,21 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
matchTimeout ?? Timeout.Infinite,
regexTree);

var regexType = new RegexType(
regexMethod,
typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText,
ns ?? string.Empty,
$"{typeDec.Identifier}{typeDec.TypeParameterList}");

RegexType current = regexType;
var parent = typeDec.Parent as TypeDeclarationSyntax;

while (parent is not null && IsAllowedKind(parent.Kind()))
{
current.ParentClass = new RegexType(
null,
current.Parent = new RegexType(
parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText,
ns ?? string.Empty,
$"{parent.Identifier}{parent.TypeParameterList}");

current = current.ParentClass;
current = current.Parent;
parent = parent.Parent as TypeDeclarationSyntax;
}

return regexType;
return regexMethod;

static bool IsAllowedKind(SyntaxKind kind) =>
kind == SyntaxKind.ClassDeclaration ||
Expand All @@ -233,13 +232,16 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
}

/// <summary>A regex method.</summary>
internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexTree Tree);
internal sealed record RegexMethod(RegexType DeclaringType, MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexTree Tree)
{
public int GeneratedId { get; set; }
public string GeneratedName => $"{MethodName}_{GeneratedId}";
}

/// <summary>A type holding a regex method.</summary>
internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name)
internal sealed record RegexType(string Keyword, string Namespace, string Name)
{
public RegexType? ParentClass { get; set; }
public int GeneratedId { get; set; }
public RegexType? Parent { get; set; }
}
}
}
59 changes: 28 additions & 31 deletions src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
{
// Produces one entry per generated regex. This may be:
// - Diagnostic in the case of a failure that should end the compilation
// - (RegexType regexType, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers) in the case of valid regex
// - (RegexType regexType, string reason, Diagnostic diagnostic) in the case of a limited-support regex
// - (RegexMethod regexMethod, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers) in the case of valid regex
// - (RegexMethod regexMethod, string reason, Diagnostic diagnostic) in the case of a limited-support regex
IncrementalValueProvider<ImmutableArray<object?>> codeOrDiagnostics =
context.SyntaxProvider

Expand All @@ -57,17 +57,17 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
// Generate the RunnerFactory for each regex, if possible. This is where the bulk of the implementation occurs.
.Select((state, _) =>
{
if (state is not RegexType regexType)
if (state is not RegexMethod regexMethod)
{
Debug.Assert(state is Diagnostic);
return state;
}
// If we're unable to generate a full implementation for this regex, report a diagnostic.
// We'll still output a limited implementation that just caches a new Regex(...).
if (!regexType.Method.Tree.Root.SupportsCompilation(out string? reason))
if (!regexMethod.Tree.Root.SupportsCompilation(out string? reason))
{
return (regexType, reason, Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, regexType.Method.MethodSyntax.GetLocation()));
return (regexMethod, reason, Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, regexMethod.MethodSyntax.GetLocation()));
}
// Generate the core logic for the regex.
Expand All @@ -76,9 +76,9 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
var writer = new IndentedTextWriter(sw);
writer.Indent += 3;
writer.WriteLine();
EmitRegexDerivedTypeRunnerFactory(writer, regexType.Method, requiredHelpers);
EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers);
writer.Indent -= 3;
return (regexType, sw.ToString(), requiredHelpers);
return (regexMethod, sw.ToString(), requiredHelpers);
})
.Collect();

Expand Down Expand Up @@ -128,37 +128,28 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
// For every generated type, we give it an incrementally increasing ID, in order to create
// unique type names even in situations where method names were the same, while also keeping
// the type names short. Note that this is why we only generate the RunnerFactory implementations
// earlier in the pipeline... we wait to avoid generating code that relies on the class names
// earlier in the pipeline... we want to avoid generating code that relies on the class names
// until we're able to iterate through them linearly keeping track of a deterministic ID
// used to name them. The boilerplate code generation that happens here is minimal when compared to
// the work required to generate the actual matching code for the regex.
int id = 0;
string generatedClassName = $"__{ComputeStringHash(compilationDataAndResults.Right.AssemblyName ?? ""):x}";
// If we have any (RegexType regexType, string generatedName, string reason, Diagnostic diagnostic), these are regexes for which we have
// limited support and need to simply output boilerplate. For now assign an ID, emit the diagnostic, and emit the partial method.
// If we have any (RegexMethod regexMethod, string generatedName, string reason, Diagnostic diagnostic), these are regexes for which we have
// limited support and need to simply output boilerplate. We need to emit their diagnostics.
// If we have any (RegexMethod regexMethod, string generatedName, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers),
// those are generated implementations to be emitted. We need to gather up their required helpers.
Dictionary<string, string[]> requiredHelpers = new();
foreach (object? result in results)
{
if (result is ValueTuple<RegexType, string, Diagnostic> limitedSupportResult)
RegexMethod? regexMethod = null;
if (result is ValueTuple<RegexMethod, string, Diagnostic> limitedSupportResult)
{
context.ReportDiagnostic(limitedSupportResult.Item3);
limitedSupportResult.Item1.GeneratedId = id++;
EmitRegexPartialMethod(limitedSupportResult.Item1, writer, generatedClassName, limitedSupportResult.Item1.GeneratedId);
writer.WriteLine();
regexMethod = limitedSupportResult.Item1;
}
}
// If we have any (RegexType regexType, string generatedName, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers),
// those are generated implementations to be emitted. For now, assign an ID, and emit the partial method. We also gather up all of the helpers
// these methods will need emitted.
Dictionary<string, string[]> requiredHelpers = new();
foreach (object? result in results)
{
if (result is ValueTuple<RegexType, string, Dictionary<string, string[]>> regexImpl)
else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
{
regexImpl.Item1.GeneratedId = id++;
foreach (KeyValuePair<string, string[]> helper in regexImpl.Item3)
{
if (!requiredHelpers.ContainsKey(helper.Key))
Expand All @@ -167,7 +158,13 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
}
}
EmitRegexPartialMethod(regexImpl.Item1, writer, generatedClassName, regexImpl.Item1.GeneratedId);
regexMethod = regexImpl.Item1;
}
if (regexMethod is not null)
{
regexMethod.GeneratedId = id++;
EmitRegexPartialMethod(regexMethod, writer, generatedClassName);
writer.WriteLine();
}
}
Expand Down Expand Up @@ -207,14 +204,14 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
writer.Indent += 2;
foreach (object? result in results)
{
if (result is ValueTuple<RegexType, string, Diagnostic> limitedSupportResult)
if (result is ValueTuple<RegexMethod, string, Diagnostic> limitedSupportResult)
{
EmitRegexLimitedBoilerplate(writer, limitedSupportResult.Item1.Method, limitedSupportResult.Item1.GeneratedId, limitedSupportResult.Item2);
EmitRegexLimitedBoilerplate(writer, limitedSupportResult.Item1, limitedSupportResult.Item1.GeneratedId, limitedSupportResult.Item2);
writer.WriteLine();
}
else if (result is ValueTuple<RegexType, string, Dictionary<string, string[]>> regexImpl)
else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
{
EmitRegexDerivedImplementation(writer, regexImpl.Item1.Method, regexImpl.Item1.GeneratedId, regexImpl.Item2);
EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item1.GeneratedId, regexImpl.Item2);
writer.WriteLine();
}
}
Expand Down

0 comments on commit 5fb55c8

Please sign in to comment.