ICSharpCode.Decompiler/IL/ControlFlow/LoopDetection.cs

﻿// Copyright (c) 2014 Daniel Grunwald
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
// 
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using ICSharpCode.Decompiler.FlowAnalysis;
using ICSharpCode.Decompiler.IL.Transforms;
using ICSharpCode.Decompiler.Util;

namespace ICSharpCode.Decompiler.IL.ControlFlow
{
	/// <summary>
	/// Detect loops in IL AST.
	/// </summary>
	/// <remarks>
	/// Transform ordering:
	/// * LoopDetection should run before other control flow structures are detected.
	/// * Blocks should be basic blocks (not extended basic blocks) so that the natural loops
	/// don't include more instructions than strictly necessary.
	/// * Loop detection should run after the 'return block' is duplicated (ControlFlowSimplification).
	/// </remarks>
	public class LoopDetection : IBlockTransform
	{
		BlockTransformContext context;
		
		/// <summary>Block container corresponding to the current cfg.</summary>
		BlockContainer currentBlockContainer;
		
		/// <summary>
		/// Check whether 'block' is a loop head; and construct a loop instruction
		/// (nested BlockContainer) if it is.
		/// </summary>
		public void Run(Block block, BlockTransformContext context)
		{
			this.context = context;
			// LoopDetection runs early enough so that block should still
			// be in the original container at this point.
			Debug.Assert(block.Parent == context.ControlFlowGraph.Container);
			this.currentBlockContainer = context.ControlFlowGraph.Container;

			// Because this is a post-order block transform, we can assume that
			// any nested loops within this loop have already been constructed.

			if (block.Instructions.Last() is SwitchInstruction switchInst) {
				// Switch instructions support "break;" just like loops
				DetectSwitchBody(block, switchInst);
			}

			ControlFlowNode h = context.ControlFlowNode; // CFG node for our potential loop head
			Debug.Assert(h.UserData == block);
			Debug.Assert(!TreeTraversal.PreOrder(h, n => n.DominatorTreeChildren).Any(n => n.Visited));

			List<ControlFlowNode> loop = null;
			foreach (var t in h.Predecessors) {
				if (h.Dominates(t)) {
					// h->t is a back edge, and h is a loop header
					// Add the natural loop of t->h to the loop.

					// Definitions:
					// * A back edge is an edge t->h so that h dominates t.
					// * The natural loop of the back edge is the smallest set of nodes
					//   that includes the back edge and has no predecessors outside the set
					//   except for the predecessor of the header.

					if (loop == null) {
						loop = new List<ControlFlowNode>();
						loop.Add(h);
						// Mark loop header as visited so that the pre-order traversal
						// stops at the loop header.
						h.Visited = true;
					}
					t.TraversePreOrder(n => n.Predecessors, loop.Add);
				}
			}
			if (loop != null) {
				var headBlock = (Block)h.UserData;
				context.Step($"Construct loop with head {headBlock.Label}", headBlock);
				// loop now is the union of all natural loops with loop head h.

				// Ensure any block included into nested loops is also considered part of this loop:
				IncludeNestedContainers(loop);
				// Try to extend the loop to reduce the number of exit points:
				ExtendLoop(h, loop, out var exitPoint);
				IncludeUnreachablePredecessors(loop);

				// Sort blocks in the loop in reverse post-order to make the output look a bit nicer.
				// (if the loop doesn't contain nested loops, this is a topological sort)
				loop.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber));
				Debug.Assert(loop[0] == h);
				foreach (var node in loop) {
					node.Visited = false; // reset visited flag so that we can find outer loops
					Debug.Assert(h.Dominates(node) || !node.IsReachable, "The loop body must be dominated by the loop head");
				}
				ConstructLoop(loop, exitPoint);
			}
		}

		/// <summary>
		/// For each block in the input loop that is the head of a nested loop or switch,
		/// include all blocks from the nested container into the loop.
		/// 
		/// This ensures that all blocks that were included into inner loops are also
		/// included into the outer loop, thus keeping our loops well-nested.
		/// </summary>
		/// <remarks>
		/// More details for why this is necessary are here:
		/// https://github.com/icsharpcode/ILSpy/issues/915
		/// 
		/// Pre+Post-Condition: node.Visited iff loop.Contains(node)
		/// </remarks>
		void IncludeNestedContainers(List<ControlFlowNode> loop)
		{
			for (int i = 0; i < loop.Count; i++) {
				IncludeBlock((Block)loop[i].UserData);
			}

			void IncludeBlock(Block block)
			{
				if (block.Instructions[0] is BlockContainer nestedContainer) {
					// Just in case the block has multiple nested containers (e.g. due to loop and switch),
					// also check the entry point:
					IncludeBlock(nestedContainer.EntryPoint);
					// Use normal processing for all non-entry-point blocks
					// (the entry-point itself doesn't have a CFG node, because it's newly created by this transform)
					for (int i = 1; i < nestedContainer.Blocks.Count; i++) {
						var node = context.ControlFlowGraph.GetNode(nestedContainer.Blocks[i]);
						Debug.Assert(loop[0].Dominates(node) || !node.IsReachable);
						if (!node.Visited) {
							node.Visited = true;
							loop.Add(node);
							// note: this block will be re-visited when the "i < loop.Count"
							// gets around to the new entry
						}
					}
				}
			}
		}

		#region ExtendLoop
		/// <summary>
		/// Given a natural loop, add additional CFG nodes to the loop in order
		/// to reduce the number of exit points out of the loop.
		/// We do this because C# only allows reaching a single exit point (with 'break'
		/// statements or when the loop condition evaluates to false), so we'd have
		/// to introduce 'goto' statements for any additional exit points.
		/// </summary>
		/// <remarks>
		/// Definition:
		/// A "reachable exit" is a branch/leave target that is reachable from the loop,
		/// but not dominated by the loop head. A reachable exit may or may not have a
		/// corresponding CFG node (depending on whether it is a block in the current block container).
		///   -> reachable exits are leaving the code region dominated by the loop
		/// 
		/// Definition:
		/// A loop "exit point" is a CFG node that is not itself part of the loop,
		/// but has at least one predecessor which is part of the loop.
		///   -> exit points are leaving the loop itself
		/// 
		/// Nodes can only be added to the loop if they are dominated by the loop head.
		/// When adding a node to the loop, we must also add all of that node's predecessors
		/// to the loop. (this ensures that the loop keeps its single entry point)
		/// 
		/// Goal: If possible, find a set of nodes that can be added to the loop so that there
		/// remains only a single exit point.
		/// Add as little code as possible to the loop to reach this goal.
		/// 
		/// This means we need to partition the set of nodes dominated by the loop entry point
		/// into two sets (in-loop and out-of-loop).
		/// Constraints:
		///  * the loop head itself is in-loop
		///  * there must not be any edge from an out-of-loop node to an in-loop node
		///    -> all predecessors of in-loop nodes are also in-loop
		///    -> all nodes in a cycle are part of the same partition
		/// Optimize:
		///  * use only a single exit point if at all possible
		///  * minimize the amount of code in the in-loop partition
		///    (thus: maximize the amount of code in the out-of-loop partition)
		///   "amount of code" could be measured as:
		///     * number of basic blocks
		///     * number of instructions directly in those basic blocks (~= number of statements)
		///     * number of instructions in those basic blocks (~= number of expressions)
		///       (we currently use the number of statements)
		/// 
		/// Observations:
		///  * If a node is in-loop, so are all its ancestors in the dominator tree (up to the loop entry point)
		///  * If there are no exits reachable from a node (i.e. all paths from that node lead to a return/throw instruction),
		///    it is valid to put the group of nodes dominated by that node into either partition independently of
		///    any other nodes except for the ancestors in the dominator tree.
		///       (exception: the loop head itself must always be in-loop)
		/// 
		/// There are two different cases we need to consider:
		/// 1) There are no exits reachable at all from the loop head.
		///    ->  it is possible to create a loop with zero exit points by adding all nodes
		///        dominated by the loop to the loop.
		///    -> the only way to exit the loop is by "return;" or "throw;"
		/// 2) There are some exits reachable from the loop head.
		/// 
		/// In case 1, we can pick a single exit point freely by picking any node that has no reachable exits
		/// (other than the loop head).
		/// All nodes dominated by the exit point are out-of-loop, all other nodes are in-loop.
		/// See PickExitPoint() for the heuristic that picks the exit point in this case.
		/// 
		/// In case 2, we need to pick our exit point so that all paths from the loop head
		/// to the reachable exits run through that exit point.
		/// 
		/// This is a form of postdominance where the reachable exits are considered exit nodes,
		/// while "return;" or "throw;" instructions are not considered exit nodes.
		/// 
		/// Using this form of postdominance, we are looking for an exit point that post-dominates all nodes in the natural loop.
		/// --> a common ancestor in post-dominator tree.
		/// To minimize the amount of code in-loop, we pick the lowest common ancestor.
		/// All nodes dominated by the exit point are out-of-loop, all other nodes are in-loop.
		/// (using normal dominance as in case 1, not post-dominance!)
		/// 
		/// If it is impossible to use a single exit point for the loop, the lowest common ancestor will be the fake "exit node"
		/// used by the post-dominance analysis. In this case, we fall back to the old heuristic algorithm.
		/// 
		/// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop.
		/// </remarks>
		void ExtendLoop(ControlFlowNode loopHead, List<ControlFlowNode> loop, out ControlFlowNode exitPoint, bool isSwitch=false)
		{
			exitPoint = FindExitPoint(loopHead, loop, isSwitch);
			Debug.Assert(!loop.Contains(exitPoint), "Cannot pick an exit point that is part of the natural loop");
			if (exitPoint != null) {
				// Either we are in case 1 and just picked an exit that maximizes the amount of code
				// outside the loop, or we are in case 2 and found an exit point via post-dominance.
				// Note that if exitPoint == NoExitPoint, we end up adding all dominated blocks to the loop.
				var ep = exitPoint;
				foreach (var node in TreeTraversal.PreOrder(loopHead, n => (n != ep) ? n.DominatorTreeChildren : null)) {
					if (node != exitPoint && !node.Visited) {
						node.Visited = true;
						loop.Add(node);
					}
				}
			} else {
				// We are in case 2, but could not find a suitable exit point.
				// Heuristically try to minimize the number of exit points
				// (but we'll always end up with more than 1 exit and will require goto statements).
				ExtendLoopHeuristic(loopHead, loop, loopHead);
			}
		}

		/// <summary>
		/// Special control flow node (not part of any graph) that signifies that we want to construct a loop
		/// without any exit point.
		/// </summary>
		static readonly ControlFlowNode NoExitPoint = new ControlFlowNode();

		/// <summary>
		/// Finds a suitable single exit point for the specified loop.
		/// </summary>
		/// <returns>
		/// 1) If a suitable exit point was found: the control flow block that should be reached when breaking from the loop
		/// 2) If the loop should not have any exit point (extend by all dominated blocks): NoExitPoint
		/// 3) otherwise (exit point unknown, heuristically extend loop): null
		/// </returns>
		/// <remarks>This method must not write to the Visited flags on the CFG.</remarks>
		ControlFlowNode FindExitPoint(ControlFlowNode loopHead, IReadOnlyList<ControlFlowNode> naturalLoop, bool treatBackEdgesAsExits)
		{
			bool hasReachableExit = context.ControlFlowGraph.HasReachableExit(loopHead);
			if (!hasReachableExit && treatBackEdgesAsExits) {
				// If we're analyzing the switch, there's no reachable exit, but the loopHead (=switchHead) block
				// is also a loop head, we consider the back-edge a reachable exit for the switch.
				hasReachableExit = loopHead.Predecessors.Any(p => loopHead.Dominates(p));
			}
			if (!hasReachableExit) {
				// Case 1:
				// There are no nodes n so that loopHead dominates a predecessor of n but not n itself
				// -> we could build a loop with zero exit points.
				if (IsPossibleForeachLoop((Block)loopHead.UserData, out var exitBranch)) {
					if (exitBranch != null) {
						// let's see if the target of the exit branch is a suitable exit point
						var cfgNode = loopHead.Successors.FirstOrDefault(n => n.UserData == exitBranch.TargetBlock);
						if (cfgNode != null && loopHead.Dominates(cfgNode) && !context.ControlFlowGraph.HasReachableExit(cfgNode)) {
							return cfgNode;
						}
					}
					return NoExitPoint;
				}
				ControlFlowNode exitPoint = null;
				int exitPointILOffset = -1;
				foreach (var node in loopHead.DominatorTreeChildren) {
					PickExitPoint(node, ref exitPoint, ref exitPointILOffset);
				}
				return exitPoint;
			} else {
				// Case 2:
				// We need to pick our exit point so that all paths from the loop head
				// to the reachable exits run through that exit point.
				var cfg = context.ControlFlowGraph.cfg;
				var revCfg = PrepareReverseCFG(loopHead, treatBackEdgesAsExits, out int exitNodeArity);
				//ControlFlowNode.ExportGraph(cfg).Show("cfg");
				//ControlFlowNode.ExportGraph(revCfg).Show("rev");
				ControlFlowNode commonAncestor = revCfg[loopHead.UserIndex];
				Debug.Assert(commonAncestor.IsReachable);
				foreach (ControlFlowNode cfgNode in naturalLoop) {
					ControlFlowNode revNode = revCfg[cfgNode.UserIndex];
					if (revNode.IsReachable) {
						commonAncestor = Dominance.FindCommonDominator(commonAncestor, revNode);
					}
				}
				// All paths from within the loop to a reachable exit run through 'commonAncestor'.
				// However, this doesn't mean that 'commonAncestor' is valid as an exit point.
				// We walk up the post-dominator tree until we've got a valid exit point:
				ControlFlowNode exitPoint;
				while (commonAncestor.UserIndex >= 0) {
					exitPoint = cfg[commonAncestor.UserIndex];
					Debug.Assert(exitPoint.Visited == naturalLoop.Contains(exitPoint));
					// It's possible that 'commonAncestor' is itself part of the natural loop.
					// If so, it's not a valid exit point.
					if (!exitPoint.Visited && ValidateExitPoint(loopHead, exitPoint)) {
						// we found an exit point
						return exitPoint;
					}
					commonAncestor = commonAncestor.ImmediateDominator;
				}
				// least common post-dominator is the artificial exit node
				// This means we're in one of two cases:
				// * The loop might have multiple exit points.
				//     -> we should return null
				// * The loop has a single exit point that wasn't considered during post-dominance analysis.
				//        (which means the single exit isn't dominated by the loop head)
				//     -> we should return NoExitPoint so that all code dominated by the loop head is included into the loop
				if (exitNodeArity > 1) {
					return null;
				} else {
					// If exitNodeArity == 0, we should maybe look test if our exits out of the block container are all compatible?
					// but I don't think it hurts to have a bit too much code inside the loop in this rare case.
					return NoExitPoint;
				}
			}
		}

		/// <summary>
		/// Validates an exit point.
		/// 
		/// An exit point is invalid iff there is a node reachable from the exit point that
		/// is dominated by the loop head, but not by the exit point.
		/// (i.e. this method returns false iff the exit point's dominance frontier contains
		/// a node dominated by the loop head. but we implement this the slow way because
		/// we don't have dominance frontiers precomputed)
		/// </summary>
		/// <remarks>
		/// We need this because it's possible that there's a return block (thus reverse-unreachable node ignored by post-dominance)
		/// that is reachable both directly from the loop, and from the exit point.
		/// </remarks>
		bool ValidateExitPoint(ControlFlowNode loopHead, ControlFlowNode exitPoint)
		{
			var cfg = context.ControlFlowGraph;
			return IsValid(exitPoint);

			bool IsValid(ControlFlowNode node)
			{
				if (!cfg.HasReachableExit(node)) {
					// Optimization: if the dominance frontier is empty, we don't need
					// to check every node.
					return true;
				}
				foreach (var succ in node.Successors) {
					if (loopHead != succ && loopHead.Dominates(succ) && !exitPoint.Dominates(succ))
						return false;
				}
				foreach (var child in node.DominatorTreeChildren) {
					if (!IsValid(child))
						return false;
				}
				return true;
			}
		}

		/// <summary>
		/// Pick exit point by picking any node that has no reachable exits.
		/// 
		/// In the common case where the code was compiled with a compiler that emits IL code
		/// in source order (like the C# compiler), we can find the "real" exit point
		/// by simply picking the block with the highest IL offset.
		/// So let's do that instead of maximizing amount of code.
		/// </summary>
		/// <returns>Code amount in <paramref name="node"/> and its dominated nodes.</returns>
		/// <remarks>This method must not write to the Visited flags on the CFG.</remarks>
		void PickExitPoint(ControlFlowNode node, ref ControlFlowNode exitPoint, ref int exitPointILOffset)
		{
			Block block = (Block)node.UserData;
			if (block.ILRange.Start > exitPointILOffset
				&& !context.ControlFlowGraph.HasReachableExit(node)
				&& ((Block)node.UserData).Parent == currentBlockContainer)
			{
				// HasReachableExit(node) == false
				// -> there are no nodes n so that `node` dominates a predecessor of n but not n itself
				// -> there is no control flow out of `node` back into the loop, so it's usable as exit point

				// Additionally, we require that the block wasn't already moved into a nested loop,
				// since there's no way to jump into the middle of that loop when we need to exit.
				// NB: this is the only reason why we detect nested loops before outer loops:
				// If we detected the outer loop first, the outer loop might pick an exit point
				// that prevents us from finding a nice exit for the inner loops, causing
				// unnecessary gotos.
				exitPoint = node;
				exitPointILOffset = block.ILRange.Start;
				return; // don't visit children, they are likely to have even later IL offsets and we'd end up
				// moving almost all of the code into the loop.
			}
			foreach (var child in node.DominatorTreeChildren) {
				PickExitPoint(child, ref exitPoint, ref exitPointILOffset);
			}
		}

		/// <summary>
		/// Constructs a new control flow graph.
		/// Each node cfg[i] has a corresponding node rev[i].
		/// Edges are only created for nodes dominated by loopHead, and are in reverse from their direction
		/// in the primary CFG.
		/// An artificial exit node is used for edges that leave the set of nodes dominated by loopHead,
		/// or that leave the block Container.
		/// </summary>
		/// <param name="loopHead">Entry point of the loop.</param>
		/// <param name="treatBackEdgesAsExits">Whether to treat loop back edges as exit points.</param>
		/// <param name="exitNodeArity">out: The number of different CFG nodes.
		/// Possible values:
		///  0 = no CFG nodes used as exit nodes (although edges leaving the block container might still be exits);
		///  1 = a single CFG node (not dominated by loopHead) was used as an exit node;
		///  2 = more than one CFG node (not dominated by loopHead) was used as an exit node.
		/// </param>
		/// <returns></returns>
		ControlFlowNode[] PrepareReverseCFG(ControlFlowNode loopHead, bool treatBackEdgesAsExits, out int exitNodeArity)
		{
			ControlFlowNode[] cfg = context.ControlFlowGraph.cfg;
			ControlFlowNode[] rev = new ControlFlowNode[cfg.Length + 1];
			for (int i = 0; i < cfg.Length; i++) {
				rev[i] = new ControlFlowNode { UserIndex = i, UserData = cfg[i].UserData };
			}
			ControlFlowNode nodeTreatedAsExitNode = null;
			bool multipleNodesTreatedAsExitNodes = false;
			ControlFlowNode exitNode = new ControlFlowNode { UserIndex = -1 };
			rev[cfg.Length] = exitNode;
			for (int i = 0; i < cfg.Length; i++) {
				if (!loopHead.Dominates(cfg[i]))
					continue;
				// Add reverse edges for all edges in cfg
				foreach (var succ in cfg[i].Successors) {
					if (loopHead.Dominates(succ) && (!treatBackEdgesAsExits || loopHead != succ)) {
						rev[succ.UserIndex].AddEdgeTo(rev[i]);
					} else {
						if (nodeTreatedAsExitNode == null)
							nodeTreatedAsExitNode = succ;
						if (nodeTreatedAsExitNode != succ)
							multipleNodesTreatedAsExitNodes = true;
						exitNode.AddEdgeTo(rev[i]);
					}
				}
				if (context.ControlFlowGraph.HasDirectExitOutOfContainer(cfg[i])) {
					exitNode.AddEdgeTo(rev[i]);
				}
			}
			if (multipleNodesTreatedAsExitNodes)
				exitNodeArity = 2; // more than 1
			else if (nodeTreatedAsExitNode != null)
				exitNodeArity = 1;
			else
				exitNodeArity = 0;
			Dominance.ComputeDominance(exitNode, context.CancellationToken);
			return rev;
		}

		static bool IsPossibleForeachLoop(Block loopHead, out Branch exitBranch)
		{
			exitBranch = null;
			var container = (BlockContainer)loopHead.Parent;
			if (!(container.SlotInfo == TryInstruction.TryBlockSlot && container.Parent is TryFinally))
				return false;
			if (loopHead.Instructions.Count != 2)
				return false;
			if (!loopHead.Instructions[0].MatchIfInstruction(out var condition, out var trueInst))
				return false;
			var falseInst = loopHead.Instructions[1];
			while (condition.MatchLogicNot(out var arg)) {
				condition = arg;
				ExtensionMethods.Swap(ref trueInst, ref falseInst);
			}
			if (!(condition is CallInstruction call && call.Method.Name == "MoveNext"))
				return false;
			if (!(call.Arguments.Count == 1 && call.Arguments[0].MatchLdLocRef(out var enumeratorVar)))
				return false;
			exitBranch = falseInst as Branch;

			// Check that loopHead is entry-point of try-block:
			Block entryPoint = container.EntryPoint;
			while (entryPoint.IncomingEdgeCount == 1 && entryPoint.Instructions.Count == 1 && entryPoint.Instructions[0].MatchBranch(out var targetBlock)) {
				// skip blocks that only branch to another block
				entryPoint = targetBlock;
			}
			return entryPoint == loopHead;
		}
		#endregion

		#region ExtendLoop (fall-back heuristic)
		/// <summary>
		/// This function implements a heuristic algorithm that tries to reduce the number of exit
		/// points. It is only used as fall-back when it is impossible to use a single exit point.
		/// </summary>
		/// <remarks>
		/// This heuristic loop extension algorithm traverses the loop head's dominator tree in pre-order.
		/// For each candidate node, we detect whether adding it to the loop reduces the number of exit points.
		/// If it does, the candidate is added to the loop.
		/// 
		/// Adding a node to the loop has two effects on the the number of exit points:
		/// * exit points that were added to the loop are no longer exit points, thus reducing the total number of exit points
		/// * successors of the newly added nodes might be new, additional exit points
		/// 
		/// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop.
		/// </remarks>
		void ExtendLoopHeuristic(ControlFlowNode loopHead, List<ControlFlowNode> loop, ControlFlowNode candidate)
		{
			Debug.Assert(candidate.Visited == loop.Contains(candidate));
			if (!candidate.Visited) {
				// This node not yet part of the loop, but might be added
				List<ControlFlowNode> additionalNodes = new List<ControlFlowNode>();
				// Find additionalNodes nodes and mark them as visited.
				candidate.TraversePreOrder(n => n.Predecessors, additionalNodes.Add);
				// This means Visited now represents the candiate extended loop.
				// Determine new exit points that are reachable from the additional nodes
				// (note: some of these might have previously been exit points, too)
				var newExitPoints = additionalNodes.SelectMany(n => n.Successors).Where(n => !n.Visited).ToHashSet();
				// Make visited represent the unextended loop, so that we can measure the exit points
				// in the old state.
				foreach (var node in additionalNodes)
					node.Visited = false;
				// Measure number of added and removed exit points
				int removedExitPoints = additionalNodes.Count(IsExitPoint);
				int addedExitPoints = newExitPoints.Count(n => !IsExitPoint(n));
				if (removedExitPoints > addedExitPoints) {
					// We can reduce the number of exit points by adding the candidate node to the loop.
					candidate.TraversePreOrder(n => n.Predecessors, loop.Add);
				}
			}
			// Pre-order traversal of dominator tree
			foreach (var node in candidate.DominatorTreeChildren) {
				ExtendLoopHeuristic(loopHead, loop, node);
			}
		}

		/// <summary>
		/// Gets whether 'node' is an exit point for the loop marked by the Visited flag.
		/// </summary>
		bool IsExitPoint(ControlFlowNode node)
		{
			if (node.Visited)
				return false; // nodes in the loop are not exit points
			foreach (var pred in node.Predecessors) {
				if (pred.Visited)
					return true;
			}
			return false;
		}
		#endregion

		/// <summary>
		/// While our normal dominance logic ensures the loop has just a single reachable entry point,
		/// it's possible that there are unreachable code blocks that have jumps into the loop.
		/// We'll also include those into the loop.
		/// 
		/// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop.
		/// </summary>
		private void IncludeUnreachablePredecessors(List<ControlFlowNode> loop)
		{
			for (int i = 1; i < loop.Count; i++) {
				Debug.Assert(loop[i].Visited);
				foreach (var pred in loop[i].Predecessors) {
					if (!pred.Visited) {
						if (pred.IsReachable) {
							Debug.Fail("All jumps into the loop body should go through the entry point");
						} else {
							pred.Visited = true;
							loop.Add(pred);
						}
					}
				}
			}
		}

		/// <summary>
		/// Move the blocks associated with the loop into a new block container.
		/// </summary>
		void ConstructLoop(List<ControlFlowNode> loop, ControlFlowNode exitPoint)
		{
			Block oldEntryPoint = (Block)loop[0].UserData;
			Block exitTargetBlock = (Block)exitPoint?.UserData;

			BlockContainer loopContainer = new BlockContainer(ContainerKind.Loop);
			Block newEntryPoint = new Block();
			loopContainer.Blocks.Add(newEntryPoint);
			// Move contents of oldEntryPoint to newEntryPoint
			// (we can't move the block itself because it might be the target of branch instructions outside the loop)
			newEntryPoint.Instructions.ReplaceList(oldEntryPoint.Instructions);
			newEntryPoint.ILRange = oldEntryPoint.ILRange;
			oldEntryPoint.Instructions.ReplaceList(new[] { loopContainer });
			if (exitTargetBlock != null)
				oldEntryPoint.Instructions.Add(new Branch(exitTargetBlock));

			MoveBlocksIntoContainer(loop, loopContainer);

			// Rewrite branches within the loop from oldEntryPoint to newEntryPoint:
			foreach (var branch in loopContainer.Descendants.OfType<Branch>()) {
				if (branch.TargetBlock == oldEntryPoint) {
					branch.TargetBlock = newEntryPoint;
				} else if (branch.TargetBlock == exitTargetBlock) {
					branch.ReplaceWith(new Leave(loopContainer) { ILRange = branch.ILRange });
				}
			}
		}

		private void MoveBlocksIntoContainer(List<ControlFlowNode> loop, BlockContainer loopContainer)
		{
			// Move other blocks into the loop body: they're all dominated by the loop header,
			// and thus cannot be the target of branch instructions outside the loop.
			for (int i = 1; i < loop.Count; i++) {
				Block block = (Block)loop[i].UserData;
				// some blocks might already be in use by nested loops that were detected earlier;
				// don't move those (they'll be implicitly moved when the block containing the
				// nested loop container is moved).
				if (block.Parent == currentBlockContainer) {
					Debug.Assert(block.ChildIndex != 0);
					int oldChildIndex = block.ChildIndex;
					loopContainer.Blocks.Add(block);
					currentBlockContainer.Blocks.SwapRemoveAt(oldChildIndex);
				}
			}
			for (int i = 1; i < loop.Count; i++) {
				// Verify that we moved all loop blocks into the loop container.
				// If we wanted to move any blocks already in use by a nested loop,
				// this means we check that the whole nested loop got moved.
				Block block = (Block)loop[i].UserData;
				Debug.Assert(block.IsDescendantOf(loopContainer));
			}
		}

		private void DetectSwitchBody(Block block, SwitchInstruction switchInst)
		{
			Debug.Assert(block.Instructions.Last() == switchInst);
			ControlFlowNode h = context.ControlFlowNode; // CFG node for our switch head
			Debug.Assert(h.UserData == block);
			Debug.Assert(!TreeTraversal.PreOrder(h, n => n.DominatorTreeChildren).Any(n => n.Visited));

			var nodesInSwitch = new List<ControlFlowNode>();
			nodesInSwitch.Add(h);
			h.Visited = true;
			ExtendLoop(h, nodesInSwitch, out var exitPoint, isSwitch: true);
			if (exitPoint != null && exitPoint.Predecessors.Count == 1 && !context.ControlFlowGraph.HasReachableExit(exitPoint)) {
				// If the exit point is reachable from just one single "break;",
				// it's better to move the code into the switch.
				// (unlike loops which should not be nested unless necessary,
				//  nesting switches makes it clearer in which cases a piece of code is reachable)
				nodesInSwitch.AddRange(TreeTraversal.PreOrder(exitPoint, p => p.DominatorTreeChildren));
				foreach (var node in nodesInSwitch) {
					node.Visited = true;
				}
				exitPoint = null;
			}
			IncludeUnreachablePredecessors(nodesInSwitch);

			context.Step("Create BlockContainer for switch", switchInst);
			// Sort blocks in the loop in reverse post-order to make the output look a bit nicer.
			// (if the loop doesn't contain nested loops, this is a topological sort)
			nodesInSwitch.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber));
			Debug.Assert(nodesInSwitch[0] == h);
			foreach (var node in nodesInSwitch) {
				node.Visited = false; // reset visited flag so that we can find outer loops
				Debug.Assert(h.Dominates(node) || !node.IsReachable, "The switch body must be dominated by the switch head");
			}

			BlockContainer switchContainer = new BlockContainer(ContainerKind.Switch);
			Block newEntryPoint = new Block();
			newEntryPoint.ILRange = switchInst.ILRange;
			switchContainer.Blocks.Add(newEntryPoint);
			newEntryPoint.Instructions.Add(switchInst);
			block.Instructions[block.Instructions.Count - 1] = switchContainer;

			Block exitTargetBlock = (Block)exitPoint?.UserData;
			if (exitTargetBlock != null) {
				block.Instructions.Add(new Branch(exitTargetBlock));
			}

			MoveBlocksIntoContainer(nodesInSwitch, switchContainer);

			// Rewrite branches within the loop from oldEntryPoint to newEntryPoint:
			foreach (var branch in switchContainer.Descendants.OfType<Branch>()) {
				if (branch.TargetBlock == exitTargetBlock) {
					branch.ReplaceWith(new Leave(switchContainer) { ILRange = branch.ILRange });
				}
			}
		}
	}
}