Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ private static string EmitRegexType(RegexType regexClass)
}

// Emit containing types
RegexType parent = regexClass.ParentClass;
RegexType? parent = regexClass.ParentClass;
var parentClasses = new Stack<string>();
while (parent != null)
while (parent is not null)
{
parentClasses.Push($"partial {parent.Keyword} {parent.Name} {parent.Constraints}");
parent = parent.ParentClass;
Expand All @@ -75,6 +75,7 @@ private static string EmitRegexType(RegexType regexClass)
// Generate a name to describe the regex instance. This includes the method name
// the user provided and a non-randomized (for determinism) hash of it to try to make
// the name that much harder to predict.
Debug.Assert(regexClass.Method is not null);
string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_";
generatedName += ComputeStringHash(generatedName).ToString("X");

Expand Down Expand Up @@ -104,31 +105,18 @@ static uint ComputeStringHash(string s)
}

/// <summary>Gets whether a given regular expression method is supported by the code generator.</summary>
private static bool SupportsCustomCodeGeneration(RegexMethod rm)
{
const RegexOptions SupportedOptions =
RegexOptions.IgnoreCase |
RegexOptions.Multiline |
RegexOptions.ExplicitCapture |
RegexOptions.Compiled |
RegexOptions.Singleline |
RegexOptions.IgnorePatternWhitespace |
RegexOptions.RightToLeft |
RegexOptions.ECMAScript |
RegexOptions.CultureInvariant;

// If we see an option we're not aware of (but that was allowed through), don't emit custom regex code.
return (rm.Options & ~(int)SupportedOptions) == 0;
}
private static bool SupportsCustomCodeGeneration(RegexMethod rm) =>
// The generator doesn't currently know how to emit code for NonBacktracking.
(rm.Options & RegexOptions.NonBacktracking) == 0;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about invalid options? Before we would return false, but now we would return true.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're weeded out in the parser here:

// Validate the options
const RegexOptions SupportedOptions =
RegexOptions.IgnoreCase |
RegexOptions.Multiline |
RegexOptions.ExplicitCapture |
RegexOptions.Compiled |
RegexOptions.Singleline |
RegexOptions.IgnorePatternWhitespace |
RegexOptions.RightToLeft |
#if DEBUG
RegexOptions.Debug |
#endif
RegexOptions.ECMAScript |
RegexOptions.CultureInvariant |
RegexOptions.NonBacktracking;
if ((regexOptions & ~SupportedOptions) != 0)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "options");
}


/// <summary>Generates the code for a regular expression method.</summary>
private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id)
{
string patternExpression = Literal(rm.Pattern);
string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({rm.Options})";
string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({(int)rm.Options})";
string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ?
"global::System.Threading.Timeout.InfiniteTimeSpan" :
$"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})";
$"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.ToString(CultureInfo.InvariantCulture)})";

writer.WriteLine(s_generatedCodeAttribute);
writer.WriteLine($"{rm.Modifiers} global::System.Text.RegularExpressions.Regex {rm.MethodName}() => {id}.Instance;");
Expand Down Expand Up @@ -242,8 +230,8 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id)
{
RegexOptions options = (RegexOptions)rm.Options;
var code = rm.Code;
var lcc = code.LeadingCharClasses;
RegexCode code = rm.Code;
(string CharClass, bool CaseInsensitive)[]? lcc = code.LeadingCharClasses;
bool rtl = code.RightToLeft;
bool hasTextInfo = false;
bool textInfoEmitted = false;
Expand Down Expand Up @@ -523,7 +511,7 @@ void EmitAnchorAndLeadingChecks()
writer.WriteLine("return true;");
}
}
else if (code.LeadingCharClasses is null)
else if (lcc is null)
{
writer.WriteLine("return true;");
}
Expand Down Expand Up @@ -680,7 +668,11 @@ void EmitAnchorAndLeadingChecks()
private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id)
{
Debug.Assert(rm.Code.Tree.Root.Type == RegexNode.Capture);
if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Code.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) &&
if ((rm.Options & RegexOptions.NonBacktracking) != 0)
{
EmitNonBacktrackingGo(writer, rm, id);
}
else if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Code.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) &&
(((RegexOptions)rm.Code.Tree.Root.Options) & RegexOptions.RightToLeft) == 0)
{
EmitSimplifiedGo(writer, rm, id);
Expand All @@ -691,6 +683,12 @@ private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id)
}
}

/// <summary>Emits the body of a Go method supporting RegexOptions.NonBacktracking.</summary>
private static void EmitNonBacktrackingGo(IndentedTextWriter writer, RegexMethod rm, string id)
{
// TODO: Implement this and remove SupportsCustomCodeGeneration.
}
Comment thread
stephentoub marked this conversation as resolved.

/// <summary>Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression.</summary>
private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id)
{
Expand Down Expand Up @@ -888,7 +886,7 @@ void EmitSwitchedBranches()
Debug.Assert(child.Type is RegexNode.One or RegexNode.Multi or RegexNode.Concatenate, child.Description());
Debug.Assert(child.Type is not RegexNode.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Type is RegexNode.One or RegexNode.Multi));

RegexNode childStart = child.FindBranchOneOrMultiStart();
RegexNode? childStart = child.FindBranchOneOrMultiStart();
Debug.Assert(childStart is not null, child.Description());

writer.WriteLine($"case {Literal(childStart.FirstCharOfOneOrMulti())}:");
Expand Down Expand Up @@ -1248,7 +1246,7 @@ void EmitUpdateBumpalong()
}

// Emits the code to handle a single-character match.
void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string offset = null)
void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset = null)
{
string expr = $"{textSpanLocal}[{Sum(textSpanPos, offset)}]";
switch (node.Type)
Expand Down Expand Up @@ -1843,7 +1841,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st
const string Backtrack = "Backtrack"; // label for backtracking

int[] codes = rm.Code.Codes;
RegexOptions options = (RegexOptions)rm.Options.Value;
RegexOptions options = rm.Options;

int labelCounter = 0;
string DefineLabel(string prefix = "L") => $"{prefix}{labelCounter++}";
Expand Down Expand Up @@ -1919,6 +1917,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st
{
using (EmitBlock(writer, $"case {i}:"))
{
Debug.Assert(notes is not null);
BacktrackNote n = notes[i];
if (n.flags != 0)
{
Expand Down Expand Up @@ -2879,7 +2878,7 @@ void Goto(int i)
/// </summary>
void Trackagain() => PushTrack(currentBacktrackNote);

void PushTrack<T>(T expr) => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
void PushTrack<T>(T expr) where T : notnull => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");

/// <summary>Retrieves the top entry on the tracking stack without popping.</summary>
string TopTrack() => "runtrack[runtrackpos]";
Expand All @@ -2896,7 +2895,7 @@ void Goto(int i)
int Code() => currentOpcode & RegexCode.Mask;

/// <summary>Saves the value of a local variable on the grouping stack.</summary>
void PushStack<T>(T expr) => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
void PushStack<T>(T expr) where T : notnull => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");

string ReadyPushStack() => "runstack[--runstackpos]";

Expand Down Expand Up @@ -2924,7 +2923,7 @@ int AddUniqueTrack(int i, int flags = RegexCode.Back)
int NextCodepos() => currentCodePos + RegexCode.OpcodeSize(codes[currentCodePos]);

/// <summary>The label for the next (forward) operation.</summary>
string AdvanceLabel() => labels![NextCodepos()];
string AdvanceLabel() => labels[NextCodepos()]!;

/// <summary>Goto the next (forward) operation.</summary>
void Advance() => writer.WriteLine($"goto {AdvanceLabel()};");
Expand Down Expand Up @@ -2971,7 +2970,7 @@ int AddGoto(int destpos)
{
if (forwardJumpsThroughSwitch[destpos] == -1)
{
forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels![destpos], destpos);
forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels[destpos]!, destpos);
}

return forwardJumpsThroughSwitch[destpos];
Expand All @@ -2998,7 +2997,7 @@ private record BacktrackNote(int flags, string label, int codepos);

private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, RegexMethod rm)
{
if (rm.MatchTimeout.HasValue && rm.MatchTimeout.Value != Timeout.Infinite)
if (rm.MatchTimeout != Timeout.Infinite)
{
writer.WriteLine("int loopTimeoutCounter = 0;");
return true;
Expand Down
Loading