/
LoopDetection.cs
710 lines (671 loc) · 31.8 KB
/
LoopDetection.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
// Copyright (c) 2014 Daniel Grunwald
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using ICSharpCode.Decompiler.FlowAnalysis;
using ICSharpCode.Decompiler.IL.Transforms;
using ICSharpCode.Decompiler.Util;
namespace ICSharpCode.Decompiler.IL.ControlFlow
{
/// <summary>
/// Detect loops in IL AST.
/// </summary>
/// <remarks>
/// Transform ordering:
/// * LoopDetection should run before other control flow structures are detected.
/// * Blocks should be basic blocks (not extended basic blocks) so that the natural loops
/// don't include more instructions than strictly necessary.
/// * Loop detection should run after the 'return block' is duplicated (ControlFlowSimplification).
/// </remarks>
public class LoopDetection : IBlockTransform
{
BlockTransformContext context;
/// <summary>Block container corresponding to the current cfg.</summary>
BlockContainer currentBlockContainer;
/// <summary>
/// Check whether 'block' is a loop head; and construct a loop instruction
/// (nested BlockContainer) if it is.
/// </summary>
public void Run(Block block, BlockTransformContext context)
{
this.context = context;
// LoopDetection runs early enough so that block should still
// be in the original container at this point.
Debug.Assert(block.Parent == context.ControlFlowGraph.Container);
this.currentBlockContainer = context.ControlFlowGraph.Container;
// Because this is a post-order block transform, we can assume that
// any nested loops within this loop have already been constructed.
if (block.Instructions.Last() is SwitchInstruction switchInst) {
// Switch instructions support "break;" just like loops
DetectSwitchBody(block, switchInst);
}
ControlFlowNode h = context.ControlFlowNode; // CFG node for our potential loop head
Debug.Assert(h.UserData == block);
Debug.Assert(!TreeTraversal.PreOrder(h, n => n.DominatorTreeChildren).Any(n => n.Visited));
List<ControlFlowNode> loop = null;
foreach (var t in h.Predecessors) {
if (h.Dominates(t)) {
// h->t is a back edge, and h is a loop header
// Add the natural loop of t->h to the loop.
// Definitions:
// * A back edge is an edge t->h so that h dominates t.
// * The natural loop of the back edge is the smallest set of nodes
// that includes the back edge and has no predecessors outside the set
// except for the predecessor of the header.
if (loop == null) {
loop = new List<ControlFlowNode>();
loop.Add(h);
// Mark loop header as visited so that the pre-order traversal
// stops at the loop header.
h.Visited = true;
}
t.TraversePreOrder(n => n.Predecessors, loop.Add);
}
}
if (loop != null) {
var headBlock = (Block)h.UserData;
context.Step($"Construct loop with head {headBlock.Label}", headBlock);
// loop now is the union of all natural loops with loop head h.
// Ensure any block included into nested loops is also considered part of this loop:
IncludeNestedContainers(loop);
// Try to extend the loop to reduce the number of exit points:
ExtendLoop(h, loop, out var exitPoint);
IncludeUnreachablePredecessors(loop);
// Sort blocks in the loop in reverse post-order to make the output look a bit nicer.
// (if the loop doesn't contain nested loops, this is a topological sort)
loop.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber));
Debug.Assert(loop[0] == h);
foreach (var node in loop) {
node.Visited = false; // reset visited flag so that we can find outer loops
Debug.Assert(h.Dominates(node) || !node.IsReachable, "The loop body must be dominated by the loop head");
}
ConstructLoop(loop, exitPoint);
}
}
/// <summary>
/// For each block in the input loop that is the head of a nested loop or switch,
/// include all blocks from the nested container into the loop.
///
/// This ensures that all blocks that were included into inner loops are also
/// included into the outer loop, thus keeping our loops well-nested.
/// </summary>
/// <remarks>
/// More details for why this is necessary are here:
/// https://github.com/icsharpcode/ILSpy/issues/915
///
/// Pre+Post-Condition: node.Visited iff loop.Contains(node)
/// </remarks>
void IncludeNestedContainers(List<ControlFlowNode> loop)
{
for (int i = 0; i < loop.Count; i++) {
IncludeBlock((Block)loop[i].UserData);
}
void IncludeBlock(Block block)
{
if (block.Instructions[0] is BlockContainer nestedContainer) {
// Just in case the block has multiple nested containers (e.g. due to loop and switch),
// also check the entry point:
IncludeBlock(nestedContainer.EntryPoint);
// Use normal processing for all non-entry-point blocks
// (the entry-point itself doesn't have a CFG node, because it's newly created by this transform)
for (int i = 1; i < nestedContainer.Blocks.Count; i++) {
var node = context.ControlFlowGraph.GetNode(nestedContainer.Blocks[i]);
Debug.Assert(loop[0].Dominates(node) || !node.IsReachable);
if (!node.Visited) {
node.Visited = true;
loop.Add(node);
// note: this block will be re-visited when the "i < loop.Count"
// gets around to the new entry
}
}
}
}
}
#region ExtendLoop
/// <summary>
/// Given a natural loop, add additional CFG nodes to the loop in order
/// to reduce the number of exit points out of the loop.
/// We do this because C# only allows reaching a single exit point (with 'break'
/// statements or when the loop condition evaluates to false), so we'd have
/// to introduce 'goto' statements for any additional exit points.
/// </summary>
/// <remarks>
/// Definition:
/// A "reachable exit" is a branch/leave target that is reachable from the loop,
/// but not dominated by the loop head. A reachable exit may or may not have a
/// corresponding CFG node (depending on whether it is a block in the current block container).
/// -> reachable exits are leaving the code region dominated by the loop
///
/// Definition:
/// A loop "exit point" is a CFG node that is not itself part of the loop,
/// but has at least one predecessor which is part of the loop.
/// -> exit points are leaving the loop itself
///
/// Nodes can only be added to the loop if they are dominated by the loop head.
/// When adding a node to the loop, we must also add all of that node's predecessors
/// to the loop. (this ensures that the loop keeps its single entry point)
///
/// Goal: If possible, find a set of nodes that can be added to the loop so that there
/// remains only a single exit point.
/// Add as little code as possible to the loop to reach this goal.
///
/// This means we need to partition the set of nodes dominated by the loop entry point
/// into two sets (in-loop and out-of-loop).
/// Constraints:
/// * the loop head itself is in-loop
/// * there must not be any edge from an out-of-loop node to an in-loop node
/// -> all predecessors of in-loop nodes are also in-loop
/// -> all nodes in a cycle are part of the same partition
/// Optimize:
/// * use only a single exit point if at all possible
/// * minimize the amount of code in the in-loop partition
/// (thus: maximize the amount of code in the out-of-loop partition)
/// "amount of code" could be measured as:
/// * number of basic blocks
/// * number of instructions directly in those basic blocks (~= number of statements)
/// * number of instructions in those basic blocks (~= number of expressions)
/// (we currently use the number of statements)
///
/// Observations:
/// * If a node is in-loop, so are all its ancestors in the dominator tree (up to the loop entry point)
/// * If there are no exits reachable from a node (i.e. all paths from that node lead to a return/throw instruction),
/// it is valid to put the group of nodes dominated by that node into either partition independently of
/// any other nodes except for the ancestors in the dominator tree.
/// (exception: the loop head itself must always be in-loop)
///
/// There are two different cases we need to consider:
/// 1) There are no exits reachable at all from the loop head.
/// -> it is possible to create a loop with zero exit points by adding all nodes
/// dominated by the loop to the loop.
/// -> the only way to exit the loop is by "return;" or "throw;"
/// 2) There are some exits reachable from the loop head.
///
/// In case 1, we can pick a single exit point freely by picking any node that has no reachable exits
/// (other than the loop head).
/// All nodes dominated by the exit point are out-of-loop, all other nodes are in-loop.
/// See PickExitPoint() for the heuristic that picks the exit point in this case.
///
/// In case 2, we need to pick our exit point so that all paths from the loop head
/// to the reachable exits run through that exit point.
///
/// This is a form of postdominance where the reachable exits are considered exit nodes,
/// while "return;" or "throw;" instructions are not considered exit nodes.
///
/// Using this form of postdominance, we are looking for an exit point that post-dominates all nodes in the natural loop.
/// --> a common ancestor in post-dominator tree.
/// To minimize the amount of code in-loop, we pick the lowest common ancestor.
/// All nodes dominated by the exit point are out-of-loop, all other nodes are in-loop.
/// (using normal dominance as in case 1, not post-dominance!)
///
/// If it is impossible to use a single exit point for the loop, the lowest common ancestor will be the fake "exit node"
/// used by the post-dominance analysis. In this case, we fall back to the old heuristic algorithm.
///
/// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop.
/// </remarks>
void ExtendLoop(ControlFlowNode loopHead, List<ControlFlowNode> loop, out ControlFlowNode exitPoint, bool isSwitch=false)
{
exitPoint = FindExitPoint(loopHead, loop, isSwitch);
Debug.Assert(!loop.Contains(exitPoint), "Cannot pick an exit point that is part of the natural loop");
if (exitPoint != null) {
// Either we are in case 1 and just picked an exit that maximizes the amount of code
// outside the loop, or we are in case 2 and found an exit point via post-dominance.
// Note that if exitPoint == NoExitPoint, we end up adding all dominated blocks to the loop.
var ep = exitPoint;
foreach (var node in TreeTraversal.PreOrder(loopHead, n => (n != ep) ? n.DominatorTreeChildren : null)) {
if (node != exitPoint && !node.Visited) {
node.Visited = true;
loop.Add(node);
}
}
} else {
// We are in case 2, but could not find a suitable exit point.
// Heuristically try to minimize the number of exit points
// (but we'll always end up with more than 1 exit and will require goto statements).
ExtendLoopHeuristic(loopHead, loop, loopHead);
}
}
/// <summary>
/// Special control flow node (not part of any graph) that signifies that we want to construct a loop
/// without any exit point.
/// </summary>
static readonly ControlFlowNode NoExitPoint = new ControlFlowNode();
/// <summary>
/// Finds a suitable single exit point for the specified loop.
/// </summary>
/// <returns>
/// 1) If a suitable exit point was found: the control flow block that should be reached when breaking from the loop
/// 2) If the loop should not have any exit point (extend by all dominated blocks): NoExitPoint
/// 3) otherwise (exit point unknown, heuristically extend loop): null
/// </returns>
/// <remarks>This method must not write to the Visited flags on the CFG.</remarks>
ControlFlowNode FindExitPoint(ControlFlowNode loopHead, IReadOnlyList<ControlFlowNode> naturalLoop, bool treatBackEdgesAsExits)
{
bool hasReachableExit = context.ControlFlowGraph.HasReachableExit(loopHead);
if (!hasReachableExit && treatBackEdgesAsExits) {
// If we're analyzing the switch, there's no reachable exit, but the loopHead (=switchHead) block
// is also a loop head, we consider the back-edge a reachable exit for the switch.
hasReachableExit = loopHead.Predecessors.Any(p => loopHead.Dominates(p));
}
if (!hasReachableExit) {
// Case 1:
// There are no nodes n so that loopHead dominates a predecessor of n but not n itself
// -> we could build a loop with zero exit points.
if (IsPossibleForeachLoop((Block)loopHead.UserData, out var exitBranch)) {
if (exitBranch != null) {
// let's see if the target of the exit branch is a suitable exit point
var cfgNode = loopHead.Successors.FirstOrDefault(n => n.UserData == exitBranch.TargetBlock);
if (cfgNode != null && loopHead.Dominates(cfgNode) && !context.ControlFlowGraph.HasReachableExit(cfgNode)) {
return cfgNode;
}
}
return NoExitPoint;
}
ControlFlowNode exitPoint = null;
int exitPointILOffset = -1;
foreach (var node in loopHead.DominatorTreeChildren) {
PickExitPoint(node, ref exitPoint, ref exitPointILOffset);
}
return exitPoint;
} else {
// Case 2:
// We need to pick our exit point so that all paths from the loop head
// to the reachable exits run through that exit point.
var cfg = context.ControlFlowGraph.cfg;
var revCfg = PrepareReverseCFG(loopHead, treatBackEdgesAsExits, out int exitNodeArity);
//ControlFlowNode.ExportGraph(cfg).Show("cfg");
//ControlFlowNode.ExportGraph(revCfg).Show("rev");
ControlFlowNode commonAncestor = revCfg[loopHead.UserIndex];
Debug.Assert(commonAncestor.IsReachable);
foreach (ControlFlowNode cfgNode in naturalLoop) {
ControlFlowNode revNode = revCfg[cfgNode.UserIndex];
if (revNode.IsReachable) {
commonAncestor = Dominance.FindCommonDominator(commonAncestor, revNode);
}
}
// All paths from within the loop to a reachable exit run through 'commonAncestor'.
// However, this doesn't mean that 'commonAncestor' is valid as an exit point.
// We walk up the post-dominator tree until we've got a valid exit point:
ControlFlowNode exitPoint;
while (commonAncestor.UserIndex >= 0) {
exitPoint = cfg[commonAncestor.UserIndex];
Debug.Assert(exitPoint.Visited == naturalLoop.Contains(exitPoint));
// It's possible that 'commonAncestor' is itself part of the natural loop.
// If so, it's not a valid exit point.
if (!exitPoint.Visited && ValidateExitPoint(loopHead, exitPoint)) {
// we found an exit point
return exitPoint;
}
commonAncestor = commonAncestor.ImmediateDominator;
}
// least common post-dominator is the artificial exit node
// This means we're in one of two cases:
// * The loop might have multiple exit points.
// -> we should return null
// * The loop has a single exit point that wasn't considered during post-dominance analysis.
// (which means the single exit isn't dominated by the loop head)
// -> we should return NoExitPoint so that all code dominated by the loop head is included into the loop
if (exitNodeArity > 1) {
return null;
} else {
// If exitNodeArity == 0, we should maybe look test if our exits out of the block container are all compatible?
// but I don't think it hurts to have a bit too much code inside the loop in this rare case.
return NoExitPoint;
}
}
}
/// <summary>
/// Validates an exit point.
///
/// An exit point is invalid iff there is a node reachable from the exit point that
/// is dominated by the loop head, but not by the exit point.
/// (i.e. this method returns false iff the exit point's dominance frontier contains
/// a node dominated by the loop head. but we implement this the slow way because
/// we don't have dominance frontiers precomputed)
/// </summary>
/// <remarks>
/// We need this because it's possible that there's a return block (thus reverse-unreachable node ignored by post-dominance)
/// that is reachable both directly from the loop, and from the exit point.
/// </remarks>
bool ValidateExitPoint(ControlFlowNode loopHead, ControlFlowNode exitPoint)
{
var cfg = context.ControlFlowGraph;
return IsValid(exitPoint);
bool IsValid(ControlFlowNode node)
{
if (!cfg.HasReachableExit(node)) {
// Optimization: if the dominance frontier is empty, we don't need
// to check every node.
return true;
}
foreach (var succ in node.Successors) {
if (loopHead != succ && loopHead.Dominates(succ) && !exitPoint.Dominates(succ))
return false;
}
foreach (var child in node.DominatorTreeChildren) {
if (!IsValid(child))
return false;
}
return true;
}
}
/// <summary>
/// Pick exit point by picking any node that has no reachable exits.
///
/// In the common case where the code was compiled with a compiler that emits IL code
/// in source order (like the C# compiler), we can find the "real" exit point
/// by simply picking the block with the highest IL offset.
/// So let's do that instead of maximizing amount of code.
/// </summary>
/// <returns>Code amount in <paramref name="node"/> and its dominated nodes.</returns>
/// <remarks>This method must not write to the Visited flags on the CFG.</remarks>
void PickExitPoint(ControlFlowNode node, ref ControlFlowNode exitPoint, ref int exitPointILOffset)
{
Block block = (Block)node.UserData;
if (block.ILRange.Start > exitPointILOffset
&& !context.ControlFlowGraph.HasReachableExit(node)
&& ((Block)node.UserData).Parent == currentBlockContainer)
{
// HasReachableExit(node) == false
// -> there are no nodes n so that `node` dominates a predecessor of n but not n itself
// -> there is no control flow out of `node` back into the loop, so it's usable as exit point
// Additionally, we require that the block wasn't already moved into a nested loop,
// since there's no way to jump into the middle of that loop when we need to exit.
// NB: this is the only reason why we detect nested loops before outer loops:
// If we detected the outer loop first, the outer loop might pick an exit point
// that prevents us from finding a nice exit for the inner loops, causing
// unnecessary gotos.
exitPoint = node;
exitPointILOffset = block.ILRange.Start;
return; // don't visit children, they are likely to have even later IL offsets and we'd end up
// moving almost all of the code into the loop.
}
foreach (var child in node.DominatorTreeChildren) {
PickExitPoint(child, ref exitPoint, ref exitPointILOffset);
}
}
/// <summary>
/// Constructs a new control flow graph.
/// Each node cfg[i] has a corresponding node rev[i].
/// Edges are only created for nodes dominated by loopHead, and are in reverse from their direction
/// in the primary CFG.
/// An artificial exit node is used for edges that leave the set of nodes dominated by loopHead,
/// or that leave the block Container.
/// </summary>
/// <param name="loopHead">Entry point of the loop.</param>
/// <param name="treatBackEdgesAsExits">Whether to treat loop back edges as exit points.</param>
/// <param name="exitNodeArity">out: The number of different CFG nodes.
/// Possible values:
/// 0 = no CFG nodes used as exit nodes (although edges leaving the block container might still be exits);
/// 1 = a single CFG node (not dominated by loopHead) was used as an exit node;
/// 2 = more than one CFG node (not dominated by loopHead) was used as an exit node.
/// </param>
/// <returns></returns>
ControlFlowNode[] PrepareReverseCFG(ControlFlowNode loopHead, bool treatBackEdgesAsExits, out int exitNodeArity)
{
ControlFlowNode[] cfg = context.ControlFlowGraph.cfg;
ControlFlowNode[] rev = new ControlFlowNode[cfg.Length + 1];
for (int i = 0; i < cfg.Length; i++) {
rev[i] = new ControlFlowNode { UserIndex = i, UserData = cfg[i].UserData };
}
ControlFlowNode nodeTreatedAsExitNode = null;
bool multipleNodesTreatedAsExitNodes = false;
ControlFlowNode exitNode = new ControlFlowNode { UserIndex = -1 };
rev[cfg.Length] = exitNode;
for (int i = 0; i < cfg.Length; i++) {
if (!loopHead.Dominates(cfg[i]))
continue;
// Add reverse edges for all edges in cfg
foreach (var succ in cfg[i].Successors) {
if (loopHead.Dominates(succ) && (!treatBackEdgesAsExits || loopHead != succ)) {
rev[succ.UserIndex].AddEdgeTo(rev[i]);
} else {
if (nodeTreatedAsExitNode == null)
nodeTreatedAsExitNode = succ;
if (nodeTreatedAsExitNode != succ)
multipleNodesTreatedAsExitNodes = true;
exitNode.AddEdgeTo(rev[i]);
}
}
if (context.ControlFlowGraph.HasDirectExitOutOfContainer(cfg[i])) {
exitNode.AddEdgeTo(rev[i]);
}
}
if (multipleNodesTreatedAsExitNodes)
exitNodeArity = 2; // more than 1
else if (nodeTreatedAsExitNode != null)
exitNodeArity = 1;
else
exitNodeArity = 0;
Dominance.ComputeDominance(exitNode, context.CancellationToken);
return rev;
}
static bool IsPossibleForeachLoop(Block loopHead, out Branch exitBranch)
{
exitBranch = null;
var container = (BlockContainer)loopHead.Parent;
if (!(container.SlotInfo == TryInstruction.TryBlockSlot && container.Parent is TryFinally))
return false;
if (loopHead.Instructions.Count != 2)
return false;
if (!loopHead.Instructions[0].MatchIfInstruction(out var condition, out var trueInst))
return false;
var falseInst = loopHead.Instructions[1];
while (condition.MatchLogicNot(out var arg)) {
condition = arg;
ExtensionMethods.Swap(ref trueInst, ref falseInst);
}
if (!(condition is CallInstruction call && call.Method.Name == "MoveNext"))
return false;
if (!(call.Arguments.Count == 1 && call.Arguments[0].MatchLdLocRef(out var enumeratorVar)))
return false;
exitBranch = falseInst as Branch;
// Check that loopHead is entry-point of try-block:
Block entryPoint = container.EntryPoint;
while (entryPoint.IncomingEdgeCount == 1 && entryPoint.Instructions.Count == 1 && entryPoint.Instructions[0].MatchBranch(out var targetBlock)) {
// skip blocks that only branch to another block
entryPoint = targetBlock;
}
return entryPoint == loopHead;
}
#endregion
#region ExtendLoop (fall-back heuristic)
/// <summary>
/// This function implements a heuristic algorithm that tries to reduce the number of exit
/// points. It is only used as fall-back when it is impossible to use a single exit point.
/// </summary>
/// <remarks>
/// This heuristic loop extension algorithm traverses the loop head's dominator tree in pre-order.
/// For each candidate node, we detect whether adding it to the loop reduces the number of exit points.
/// If it does, the candidate is added to the loop.
///
/// Adding a node to the loop has two effects on the the number of exit points:
/// * exit points that were added to the loop are no longer exit points, thus reducing the total number of exit points
/// * successors of the newly added nodes might be new, additional exit points
///
/// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop.
/// </remarks>
void ExtendLoopHeuristic(ControlFlowNode loopHead, List<ControlFlowNode> loop, ControlFlowNode candidate)
{
Debug.Assert(candidate.Visited == loop.Contains(candidate));
if (!candidate.Visited) {
// This node not yet part of the loop, but might be added
List<ControlFlowNode> additionalNodes = new List<ControlFlowNode>();
// Find additionalNodes nodes and mark them as visited.
candidate.TraversePreOrder(n => n.Predecessors, additionalNodes.Add);
// This means Visited now represents the candiate extended loop.
// Determine new exit points that are reachable from the additional nodes
// (note: some of these might have previously been exit points, too)
var newExitPoints = additionalNodes.SelectMany(n => n.Successors).Where(n => !n.Visited).ToHashSet();
// Make visited represent the unextended loop, so that we can measure the exit points
// in the old state.
foreach (var node in additionalNodes)
node.Visited = false;
// Measure number of added and removed exit points
int removedExitPoints = additionalNodes.Count(IsExitPoint);
int addedExitPoints = newExitPoints.Count(n => !IsExitPoint(n));
if (removedExitPoints > addedExitPoints) {
// We can reduce the number of exit points by adding the candidate node to the loop.
candidate.TraversePreOrder(n => n.Predecessors, loop.Add);
}
}
// Pre-order traversal of dominator tree
foreach (var node in candidate.DominatorTreeChildren) {
ExtendLoopHeuristic(loopHead, loop, node);
}
}
/// <summary>
/// Gets whether 'node' is an exit point for the loop marked by the Visited flag.
/// </summary>
bool IsExitPoint(ControlFlowNode node)
{
if (node.Visited)
return false; // nodes in the loop are not exit points
foreach (var pred in node.Predecessors) {
if (pred.Visited)
return true;
}
return false;
}
#endregion
/// <summary>
/// While our normal dominance logic ensures the loop has just a single reachable entry point,
/// it's possible that there are unreachable code blocks that have jumps into the loop.
/// We'll also include those into the loop.
///
/// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop.
/// </summary>
private void IncludeUnreachablePredecessors(List<ControlFlowNode> loop)
{
for (int i = 1; i < loop.Count; i++) {
Debug.Assert(loop[i].Visited);
foreach (var pred in loop[i].Predecessors) {
if (!pred.Visited) {
if (pred.IsReachable) {
Debug.Fail("All jumps into the loop body should go through the entry point");
} else {
pred.Visited = true;
loop.Add(pred);
}
}
}
}
}
/// <summary>
/// Move the blocks associated with the loop into a new block container.
/// </summary>
void ConstructLoop(List<ControlFlowNode> loop, ControlFlowNode exitPoint)
{
Block oldEntryPoint = (Block)loop[0].UserData;
Block exitTargetBlock = (Block)exitPoint?.UserData;
BlockContainer loopContainer = new BlockContainer(ContainerKind.Loop);
Block newEntryPoint = new Block();
loopContainer.Blocks.Add(newEntryPoint);
// Move contents of oldEntryPoint to newEntryPoint
// (we can't move the block itself because it might be the target of branch instructions outside the loop)
newEntryPoint.Instructions.ReplaceList(oldEntryPoint.Instructions);
newEntryPoint.ILRange = oldEntryPoint.ILRange;
oldEntryPoint.Instructions.ReplaceList(new[] { loopContainer });
if (exitTargetBlock != null)
oldEntryPoint.Instructions.Add(new Branch(exitTargetBlock));
MoveBlocksIntoContainer(loop, loopContainer);
// Rewrite branches within the loop from oldEntryPoint to newEntryPoint:
foreach (var branch in loopContainer.Descendants.OfType<Branch>()) {
if (branch.TargetBlock == oldEntryPoint) {
branch.TargetBlock = newEntryPoint;
} else if (branch.TargetBlock == exitTargetBlock) {
branch.ReplaceWith(new Leave(loopContainer) { ILRange = branch.ILRange });
}
}
}
private void MoveBlocksIntoContainer(List<ControlFlowNode> loop, BlockContainer loopContainer)
{
// Move other blocks into the loop body: they're all dominated by the loop header,
// and thus cannot be the target of branch instructions outside the loop.
for (int i = 1; i < loop.Count; i++) {
Block block = (Block)loop[i].UserData;
// some blocks might already be in use by nested loops that were detected earlier;
// don't move those (they'll be implicitly moved when the block containing the
// nested loop container is moved).
if (block.Parent == currentBlockContainer) {
Debug.Assert(block.ChildIndex != 0);
int oldChildIndex = block.ChildIndex;
loopContainer.Blocks.Add(block);
currentBlockContainer.Blocks.SwapRemoveAt(oldChildIndex);
}
}
for (int i = 1; i < loop.Count; i++) {
// Verify that we moved all loop blocks into the loop container.
// If we wanted to move any blocks already in use by a nested loop,
// this means we check that the whole nested loop got moved.
Block block = (Block)loop[i].UserData;
Debug.Assert(block.IsDescendantOf(loopContainer));
}
}
private void DetectSwitchBody(Block block, SwitchInstruction switchInst)
{
Debug.Assert(block.Instructions.Last() == switchInst);
ControlFlowNode h = context.ControlFlowNode; // CFG node for our switch head
Debug.Assert(h.UserData == block);
Debug.Assert(!TreeTraversal.PreOrder(h, n => n.DominatorTreeChildren).Any(n => n.Visited));
var nodesInSwitch = new List<ControlFlowNode>();
nodesInSwitch.Add(h);
h.Visited = true;
ExtendLoop(h, nodesInSwitch, out var exitPoint, isSwitch: true);
if (exitPoint != null && exitPoint.Predecessors.Count == 1 && !context.ControlFlowGraph.HasReachableExit(exitPoint)) {
// If the exit point is reachable from just one single "break;",
// it's better to move the code into the switch.
// (unlike loops which should not be nested unless necessary,
// nesting switches makes it clearer in which cases a piece of code is reachable)
nodesInSwitch.AddRange(TreeTraversal.PreOrder(exitPoint, p => p.DominatorTreeChildren));
foreach (var node in nodesInSwitch) {
node.Visited = true;
}
exitPoint = null;
}
IncludeUnreachablePredecessors(nodesInSwitch);
context.Step("Create BlockContainer for switch", switchInst);
// Sort blocks in the loop in reverse post-order to make the output look a bit nicer.
// (if the loop doesn't contain nested loops, this is a topological sort)
nodesInSwitch.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber));
Debug.Assert(nodesInSwitch[0] == h);
foreach (var node in nodesInSwitch) {
node.Visited = false; // reset visited flag so that we can find outer loops
Debug.Assert(h.Dominates(node) || !node.IsReachable, "The switch body must be dominated by the switch head");
}
BlockContainer switchContainer = new BlockContainer(ContainerKind.Switch);
Block newEntryPoint = new Block();
newEntryPoint.ILRange = switchInst.ILRange;
switchContainer.Blocks.Add(newEntryPoint);
newEntryPoint.Instructions.Add(switchInst);
block.Instructions[block.Instructions.Count - 1] = switchContainer;
Block exitTargetBlock = (Block)exitPoint?.UserData;
if (exitTargetBlock != null) {
block.Instructions.Add(new Branch(exitTargetBlock));
}
MoveBlocksIntoContainer(nodesInSwitch, switchContainer);
// Rewrite branches within the loop from oldEntryPoint to newEntryPoint:
foreach (var branch in switchContainer.Descendants.OfType<Branch>()) {
if (branch.TargetBlock == exitTargetBlock) {
branch.ReplaceWith(new Leave(switchContainer) { ILRange = branch.ILRange });
}
}
}
}
}