Permalink
Fetching contributors…
Cannot retrieve contributors at this time
9261 lines (7977 sloc) 311 KB
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX XX
XX Optimizer XX
XX XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#pragma warning(disable : 4701)
#endif
/*****************************************************************************/
void Compiler::optInit()
{
optLoopsMarked = false;
fgHasLoops = false;
/* Initialize the # of tracked loops to 0 */
optLoopCount = 0;
optLoopTable = nullptr;
/* Keep track of the number of calls and indirect calls made by this method */
optCallCount = 0;
optIndirectCallCount = 0;
optNativeCallCount = 0;
optAssertionCount = 0;
optAssertionDep = nullptr;
#if FEATURE_ANYCSE
optCSECandidateTotal = 0;
optCSEstart = UINT_MAX;
optCSEcount = 0;
#endif // FEATURE_ANYCSE
}
DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
{
}
/*****************************************************************************
*
*/
void Compiler::optSetBlockWeights()
{
noway_assert(!opts.MinOpts() && !opts.compDbgCode);
assert(fgDomsComputed);
#ifdef DEBUG
bool changed = false;
#endif
bool firstBBdomsRets = true;
BasicBlock* block;
for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
{
/* Blocks that can't be reached via the first block are rarely executed */
if (!fgReachable(fgFirstBB, block))
{
block->bbSetRunRarely();
}
if (block->bbWeight != BB_ZERO_WEIGHT)
{
// Calculate our bbWeight:
//
// o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
// o otherwise BB_UNITY_WEIGHT / 2
//
bool domsRets = true; // Assume that we will dominate
for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
{
if (!fgDominate(block, retBlocks->block))
{
domsRets = false;
break;
}
}
if (block == fgFirstBB)
{
firstBBdomsRets = domsRets;
}
// If we are not using profile weight then we lower the weight
// of blocks that do not dominate a return block
//
if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
{
#if DEBUG
changed = true;
#endif
block->modifyBBWeight(block->bbWeight / 2);
noway_assert(block->bbWeight);
}
}
}
#if DEBUG
if (changed && verbose)
{
printf("\nAfter optSetBlockWeights:\n");
fgDispBasicBlocks();
printf("\n");
}
/* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
fgDebugCheckBBlist();
#endif
}
/*****************************************************************************
*
* Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
*/
void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
{
/* Calculate the 'loopWeight',
this is the amount to increase each block in the loop
Our heuristic is that loops are weighted eight times more
than straight line code.
Thus we increase each block by 7 times the weight of
the loop header block,
if the loops are all properly formed gives us:
(assuming that BB_LOOP_WEIGHT is 8)
1 -- non loop basic block
8 -- single loop nesting
64 -- double loop nesting
512 -- triple loop nesting
*/
noway_assert(begBlk->bbNum <= endBlk->bbNum);
noway_assert(begBlk->isLoopHead());
noway_assert(fgReachable(begBlk, endBlk));
#ifdef DEBUG
if (verbose)
{
printf("\nMarking loop L%02u", begBlk->bbLoopNum);
}
#endif
noway_assert(!opts.MinOpts());
/* Build list of backedges for block begBlk */
flowList* backedgeList = nullptr;
for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
{
/* Is this a backedge? */
if (pred->flBlock->bbNum >= begBlk->bbNum)
{
flowList* flow = new (this, CMK_FlowList) flowList();
#if MEASURE_BLOCK_SIZE
genFlowNodeCnt += 1;
genFlowNodeSize += sizeof(flowList);
#endif // MEASURE_BLOCK_SIZE
flow->flNext = backedgeList;
flow->flBlock = pred->flBlock;
backedgeList = flow;
}
}
/* At least one backedge must have been found (the one from endBlk) */
noway_assert(backedgeList);
BasicBlock* curBlk = begBlk;
while (true)
{
noway_assert(curBlk);
// For curBlk to be part of a loop that starts at begBlk
// curBlk must be reachable from begBlk and (since this is a loop)
// likewise begBlk must be reachable from curBlk.
//
if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
{
/* If this block reaches any of the backedge blocks we set reachable */
/* If this block dominates any of the backedge blocks we set dominates */
bool reachable = false;
bool dominates = false;
for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
{
BasicBlock* backedge = tmp->flBlock;
if (!curBlk->isRunRarely())
{
reachable |= fgReachable(curBlk, backedge);
dominates |= fgDominate(curBlk, backedge);
if (dominates && reachable)
{
break;
}
}
}
if (reachable)
{
noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
unsigned weight;
if (curBlk->hasProfileWeight())
{
// We have real profile weights, so we aren't going to change this blocks weight
weight = curBlk->bbWeight;
}
else
{
if (dominates)
{
weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
}
else
{
weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
}
//
// The multiplication may have caused us to overflow
//
if (weight < curBlk->bbWeight)
{
// The multiplication caused us to overflow
weight = BB_MAX_WEIGHT;
}
//
// Set the new weight
//
curBlk->modifyBBWeight(weight);
}
#ifdef DEBUG
if (verbose)
{
printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
}
#endif
}
}
/* Stop if we've reached the last block in the loop */
if (curBlk == endBlk)
{
break;
}
curBlk = curBlk->bbNext;
/* If we are excluding the endBlk then stop if we've reached endBlk */
if (excludeEndBlk && (curBlk == endBlk))
{
break;
}
}
}
/*****************************************************************************
*
* Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
*/
void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
{
/* A set of blocks that were previously marked as a loop are now
to be unmarked, since we have decided that for some reason this
loop no longer exists.
Basically we are just reseting the blocks bbWeight to their
previous values.
*/
noway_assert(begBlk->bbNum <= endBlk->bbNum);
noway_assert(begBlk->isLoopHead());
noway_assert(!opts.MinOpts());
BasicBlock* curBlk;
unsigned backEdgeCount = 0;
for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
{
curBlk = pred->flBlock;
/* is this a backward edge? (from curBlk to begBlk) */
if (begBlk->bbNum > curBlk->bbNum)
{
continue;
}
/* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
{
continue;
}
backEdgeCount++;
}
/* Only unmark the loop blocks if we have exactly one loop back edge */
if (backEdgeCount != 1)
{
#ifdef DEBUG
if (verbose)
{
if (backEdgeCount > 0)
{
printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
}
else if (backEdgeCount == 0)
{
printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
}
}
#endif
return;
}
noway_assert(backEdgeCount == 1);
noway_assert(fgReachable(begBlk, endBlk));
#ifdef DEBUG
if (verbose)
{
printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
}
#endif
curBlk = begBlk;
while (true)
{
noway_assert(curBlk);
// For curBlk to be part of a loop that starts at begBlk
// curBlk must be reachable from begBlk and (since this is a loop)
// likewise begBlk must be reachable from curBlk.
//
if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
{
unsigned weight = curBlk->bbWeight;
// Don't unmark blocks that are set to BB_MAX_WEIGHT
// Don't unmark blocks when we are using profile weights
//
if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight())
{
if (!fgDominate(curBlk, endBlk))
{
weight *= 2;
}
else
{
/* Merging of blocks can disturb the Dominates
information (see RAID #46649) */
if (weight < BB_LOOP_WEIGHT)
{
weight *= 2;
}
}
// We can overflow here so check for it
if (weight < curBlk->bbWeight)
{
weight = BB_MAX_WEIGHT;
}
assert(weight >= BB_LOOP_WEIGHT);
curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
}
#ifdef DEBUG
if (verbose)
{
printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
}
#endif
}
/* Stop if we've reached the last block in the loop */
if (curBlk == endBlk)
{
break;
}
curBlk = curBlk->bbNext;
/* Stop if we go past the last block in the loop, as it may have been deleted */
if (curBlk->bbNum > endBlk->bbNum)
{
break;
}
}
}
/*****************************************************************************************************
*
* Function called to update the loop table and bbWeight before removing a block
*/
void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
{
if (!optLoopsMarked)
{
return;
}
noway_assert(!opts.MinOpts());
bool removeLoop = false;
/* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
/* Special case: the block was the head of a loop - or pointing to a loop entry */
for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
{
/* Some loops may have been already removed by
* loop unrolling or conditional folding */
if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
{
continue;
}
if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
{
optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
continue;
}
#ifdef DEBUG
if (verbose)
{
printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
optPrintLoopInfo(loopNum);
}
#endif
/* If the loop is still in the table
* any block in the loop must be reachable !!! */
noway_assert(optLoopTable[loopNum].lpEntry != block);
noway_assert(optLoopTable[loopNum].lpBottom != block);
if (optLoopTable[loopNum].lpExit == block)
{
optLoopTable[loopNum].lpExit = nullptr;
optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
;
}
/* If this points to the actual entry in the loop
* then the whole loop may become unreachable */
switch (block->bbJumpKind)
{
unsigned jumpCnt;
BasicBlock** jumpTab;
case BBJ_NONE:
case BBJ_COND:
if (block->bbNext == optLoopTable[loopNum].lpEntry)
{
removeLoop = true;
break;
}
if (block->bbJumpKind == BBJ_NONE)
{
break;
}
__fallthrough;
case BBJ_ALWAYS:
noway_assert(block->bbJumpDest);
if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
{
removeLoop = true;
}
break;
case BBJ_SWITCH:
jumpCnt = block->bbJumpSwt->bbsCount;
jumpTab = block->bbJumpSwt->bbsDstTab;
do
{
noway_assert(*jumpTab);
if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
{
removeLoop = true;
}
} while (++jumpTab, --jumpCnt);
break;
default:
break;
}
if (removeLoop)
{
/* Check if the entry has other predecessors outside the loop
* TODO: Replace this when predecessors are available */
BasicBlock* auxBlock;
for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
{
/* Ignore blocks in the loop */
if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
{
continue;
}
switch (auxBlock->bbJumpKind)
{
unsigned jumpCnt;
BasicBlock** jumpTab;
case BBJ_NONE:
case BBJ_COND:
if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
{
removeLoop = false;
break;
}
if (auxBlock->bbJumpKind == BBJ_NONE)
{
break;
}
__fallthrough;
case BBJ_ALWAYS:
noway_assert(auxBlock->bbJumpDest);
if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
{
removeLoop = false;
}
break;
case BBJ_SWITCH:
jumpCnt = auxBlock->bbJumpSwt->bbsCount;
jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
do
{
noway_assert(*jumpTab);
if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
{
removeLoop = false;
}
} while (++jumpTab, --jumpCnt);
break;
default:
break;
}
}
if (removeLoop)
{
optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
}
}
else if (optLoopTable[loopNum].lpHead == block)
{
/* The loop has a new head - Just update the loop table */
optLoopTable[loopNum].lpHead = block->bbPrev;
}
#ifdef DEBUG
if (verbose)
{
printf("\nUpdateLoopsBeforeRemoveBlock After: ");
optPrintLoopInfo(loopNum);
}
#endif
}
if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
(block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
(fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
{
optUnmarkLoopBlocks(block->bbJumpDest, block);
}
}
#ifdef DEBUG
/*****************************************************************************
*
* Given the beginBlock of the loop, return the index of this loop
* to the loop table.
*/
unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
{
unsigned lnum = 0;
for (lnum = 0; lnum < optLoopCount; lnum++)
{
if (optLoopTable[lnum].lpHead->bbNext == begBlk)
{
// Found the loop.
return lnum;
}
}
noway_assert(!"Loop number not found.");
return optLoopCount;
}
/*****************************************************************************
*
* Print loop info in an uniform way.
*/
void Compiler::optPrintLoopInfo(unsigned loopInd,
BasicBlock* lpHead,
BasicBlock* lpFirst,
BasicBlock* lpTop,
BasicBlock* lpEntry,
BasicBlock* lpBottom,
unsigned char lpExitCnt,
BasicBlock* lpExit,
unsigned parentLoop)
{
noway_assert(lpHead);
//
// NOTE: we take "loopInd" as an argument instead of using the one
// stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
// has not be set correctly. For example, in optRecordLoop().
// However, in most of the cases, loops should have been recorded.
// Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
// version of this method.
//
printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
if (lpTop != lpFirst)
{
printf(" (loop top is BB%02u)", lpTop->bbNum);
}
printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
lpExitCnt);
if (lpExitCnt == 1)
{
printf(" at BB%02u", lpExit->bbNum);
}
if (parentLoop != BasicBlock::NOT_IN_LOOP)
{
printf(", parent loop = L%02u", parentLoop);
}
printf(")");
}
/*****************************************************************************
*
* Print loop information given the index of the loop in the loop table.
*/
void Compiler::optPrintLoopInfo(unsigned lnum)
{
noway_assert(lnum < optLoopCount);
LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
ldsc->lpExit, ldsc->lpParent);
}
#endif
//------------------------------------------------------------------------
// optPopulateInitInfo: Populate loop init info in the loop table.
//
// Arguments:
// init - the tree that is supposed to initialize the loop iterator.
// iterVar - loop iteration variable.
//
// Return Value:
// "false" if the loop table could not be populated with the loop iterVar init info.
//
// Operation:
// The 'init' tree is checked if its lhs is a local and rhs is either
// a const or a local.
//
bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTree* init, unsigned iterVar)
{
// Operator should be =
if (init->gtOper != GT_ASG)
{
return false;
}
GenTree* lhs = init->gtOp.gtOp1;
GenTree* rhs = init->gtOp.gtOp2;
// LHS has to be local and should equal iterVar.
if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
{
return false;
}
// RHS can be constant or local var.
// TODO-CQ: CLONE: Add arr length for descending loops.
if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
{
optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
}
else if (rhs->gtOper == GT_LCL_VAR)
{
optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
}
else
{
return false;
}
return true;
}
//----------------------------------------------------------------------------------
// optCheckIterInLoopTest: Check if iter var is used in loop test.
//
// Arguments:
// test "jtrue" tree or an asg of the loop iter termination condition
// from/to blocks (beg, end) which are part of the loop.
// iterVar loop iteration variable.
// loopInd loop index.
//
// Operation:
// The test tree is parsed to check if "iterVar" matches the lhs of the condition
// and the rhs limit is extracted from the "test" tree. The limit information is
// added to the loop table.
//
// Return Value:
// "false" if the loop table could not be populated with the loop test info or
// if the test condition doesn't involve iterVar.
//
bool Compiler::optCheckIterInLoopTest(
unsigned loopInd, GenTree* test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
{
// Obtain the relop from the "test" tree.
GenTree* relop;
if (test->gtOper == GT_JTRUE)
{
relop = test->gtGetOp1();
}
else
{
assert(test->gtOper == GT_ASG);
relop = test->gtGetOp2();
}
noway_assert(relop->OperKind() & GTK_RELOP);
GenTree* opr1 = relop->gtOp.gtOp1;
GenTree* opr2 = relop->gtOp.gtOp2;
GenTree* iterOp;
GenTree* limitOp;
// Make sure op1 or op2 is the iterVar.
if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
{
iterOp = opr1;
limitOp = opr2;
}
else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
{
iterOp = opr2;
limitOp = opr1;
}
else
{
return false;
}
if (iterOp->gtType != TYP_INT)
{
return false;
}
// Mark the iterator node.
iterOp->gtFlags |= GTF_VAR_ITERATOR;
// Check what type of limit we have - constant, variable or arr-len.
if (limitOp->gtOper == GT_CNS_INT)
{
optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
{
optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
}
}
else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
{
optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
}
else if (limitOp->gtOper == GT_ARR_LENGTH)
{
optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
}
else
{
return false;
}
// Save the type of the comparison between the iterator and the limit.
optLoopTable[loopInd].lpTestTree = relop;
return true;
}
//----------------------------------------------------------------------------------
// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
//
// Arguments:
// incr The incr tree to be checked. Whether incr tree is
// oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
//
// Operation:
// The test tree is parsed to check if "iterVar" matches the lhs of the condition
// and the rhs limit is extracted from the "test" tree. The limit information is
// added to the loop table.
//
// Return Value:
// iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
//
unsigned Compiler::optIsLoopIncrTree(GenTree* incr)
{
GenTree* incrVal;
genTreeOps updateOper;
unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
if (iterVar != BAD_VAR_NUM)
{
// We have v = v op y type asg node.
switch (updateOper)
{
case GT_ADD:
case GT_SUB:
case GT_MUL:
case GT_RSH:
case GT_LSH:
break;
default:
return BAD_VAR_NUM;
}
// Increment should be by a const int.
// TODO-CQ: CLONE: allow variable increments.
if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
{
return BAD_VAR_NUM;
}
}
return iterVar;
}
//----------------------------------------------------------------------------------
// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
//
// Arguments:
// from, to - are blocks (beg, end) which are part of the loop.
// incr - tree that increments the loop iterator. v+=1 or v=v+1.
// pIterVar - see return value.
//
// Return Value:
// Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
// false.
//
// Operation:
// Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
// assigned in the loop.
//
bool Compiler::optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
{
unsigned iterVar = optIsLoopIncrTree(incr);
if (iterVar == BAD_VAR_NUM)
{
return false;
}
if (optIsVarAssigned(from, to, incr, iterVar))
{
JITDUMP("iterVar is assigned in loop\n");
return false;
}
*pIterVar = iterVar;
return true;
}
//----------------------------------------------------------------------------------
// optIsLoopTestEvalIntoTemp:
// Pattern match if the test tree is computed into a tmp
// and the "tmp" is used as jump condition for loop termination.
//
// Arguments:
// testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
// where Vtmp contains the actual loop test result.
// newStmt - contains the statement that is the actual test stmt involving
// the loop iterator.
//
// Return Value:
// Returns true if a new test tree can be obtained.
//
// Operation:
// Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
// Then returns the rhs for def of Vtmp as the "test" node.
//
// Note:
// This method just retrieves what it thinks is the "test" node,
// the callers are expected to verify that "iterVar" is used in the test.
//
bool Compiler::optIsLoopTestEvalIntoTemp(GenTree* testStmt, GenTree** newTest)
{
GenTree* test = testStmt->gtStmt.gtStmtExpr;
if (test->gtOper != GT_JTRUE)
{
return false;
}
GenTree* relop = test->gtGetOp1();
noway_assert(relop->OperIsCompare());
GenTree* opr1 = relop->gtOp.gtOp1;
GenTree* opr2 = relop->gtOp.gtOp2;
// Make sure we have jtrue (vtmp != 0)
if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
opr2->IsIntegralConst(0))
{
// Get the previous statement to get the def (rhs) of Vtmp to see
// if the "test" is evaluated into Vtmp.
GenTree* prevStmt = testStmt->gtPrev;
if (prevStmt == nullptr)
{
return false;
}
GenTree* tree = prevStmt->gtStmt.gtStmtExpr;
if (tree->OperGet() == GT_ASG)
{
GenTree* lhs = tree->gtOp.gtOp1;
GenTree* rhs = tree->gtOp.gtOp2;
// Return as the new test node.
if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
{
if (rhs->OperIsCompare())
{
*newTest = prevStmt;
return true;
}
}
}
}
return false;
}
//----------------------------------------------------------------------------------
// optExtractInitTestIncr:
// Extract the "init", "test" and "incr" nodes of the loop.
//
// Arguments:
// head - Loop head block
// bottom - Loop bottom block
// top - Loop top block
// ppInit - The init stmt of the loop if found.
// ppTest - The test stmt of the loop if found.
// ppIncr - The incr stmt of the loop if found.
//
// Return Value:
// The results are put in "ppInit", "ppTest" and "ppIncr" if the method
// returns true. Returns false if the information can't be extracted.
//
// Operation:
// Check if the "test" stmt is last stmt in the loop "bottom". If found good,
// "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
// "test" to get the "incr" stmt. If it is not found it could be a loop of the
// below form.
//
// +-------<-----------------<-----------+
// | |
// v |
// BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
//
// Check if the "incr" tree is present in the loop "top" node as the last stmt.
// Also check if the "test" tree is assigned to a tmp node and the tmp is used
// in the jtrue condition.
//
// Note:
// This method just retrieves what it thinks is the "test" node,
// the callers are expected to verify that "iterVar" is used in the test.
//
bool Compiler::optExtractInitTestIncr(
BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr)
{
assert(ppInit != nullptr);
assert(ppTest != nullptr);
assert(ppIncr != nullptr);
// Check if last two statements in the loop body are the increment of the iterator
// and the loop termination test.
noway_assert(bottom->bbTreeList != nullptr);
GenTree* test = bottom->bbTreeList->gtPrev;
noway_assert(test != nullptr && test->gtNext == nullptr);
GenTree* newTest;
if (optIsLoopTestEvalIntoTemp(test, &newTest))
{
test = newTest;
}
// Check if we have the incr tree before the test tree, if we don't,
// check if incr is part of the loop "top".
GenTree* incr = test->gtPrev;
if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
{
if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
{
return false;
}
// If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
GenTree* topLast = top->bbTreeList->gtPrev;
if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
{
incr = topLast;
}
else
{
return false;
}
}
assert(test != incr);
// Find the last statement in the loop pre-header which we expect to be the initialization of
// the loop iterator.
GenTree* phdr = head->bbTreeList;
if (phdr == nullptr)
{
return false;
}
GenTree* init = phdr->gtPrev;
noway_assert(init != nullptr && (init->gtNext == nullptr));
// If it is a duplicated loop condition, skip it.
if (init->gtFlags & GTF_STMT_CMPADD)
{
bool doGetPrev = true;
#ifdef DEBUG
if (opts.optRepeat)
{
// Previous optimization passes may have inserted compiler-generated
// statements other than duplicated loop conditions.
doGetPrev = (init->gtPrev != nullptr);
}
else
{
// Must be a duplicated loop condition.
noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
}
#endif // DEBUG
if (doGetPrev)
{
init = init->gtPrev;
}
noway_assert(init != nullptr);
}
noway_assert(init->gtOper == GT_STMT);
noway_assert(test->gtOper == GT_STMT);
noway_assert(incr->gtOper == GT_STMT);
*ppInit = init->gtStmt.gtStmtExpr;
*ppTest = test->gtStmt.gtStmtExpr;
*ppIncr = incr->gtStmt.gtStmtExpr;
return true;
}
/*****************************************************************************
*
* Record the loop in the loop table. Return true if successful, false if
* out of entries in loop table.
*/
bool Compiler::optRecordLoop(BasicBlock* head,
BasicBlock* first,
BasicBlock* top,
BasicBlock* entry,
BasicBlock* bottom,
BasicBlock* exit,
unsigned char exitCnt)
{
// Record this loop in the table, if there's room.
assert(optLoopCount <= MAX_LOOP_NUM);
if (optLoopCount == MAX_LOOP_NUM)
{
#if COUNT_LOOPS
loopOverflowThisMethod = true;
#endif
return false;
}
// Assumed preconditions on the loop we're adding.
assert(first->bbNum <= top->bbNum);
assert(top->bbNum <= entry->bbNum);
assert(entry->bbNum <= bottom->bbNum);
assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
unsigned char loopInd = optLoopCount;
if (optLoopTable == nullptr)
{
assert(loopInd == 0);
optLoopTable = getAllocator(CMK_LoopOpt).allocate<LoopDsc>(MAX_LOOP_NUM);
}
else
{
// If the new loop contains any existing ones, add it in the right place.
for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
{
unsigned char prev = prevPlus1 - 1;
if (optLoopTable[prev].lpContainedBy(first, bottom))
{
loopInd = prev;
}
}
// Move up any loops if necessary.
for (unsigned j = optLoopCount; j > loopInd; j--)
{
optLoopTable[j] = optLoopTable[j - 1];
}
}
#ifdef DEBUG
for (unsigned i = loopInd + 1; i < optLoopCount; i++)
{
// The loop is well-formed.
assert(optLoopTable[i].lpWellFormed());
// Check for disjoint.
if (optLoopTable[i].lpDisjoint(first, bottom))
{
continue;
}
// Otherwise, assert complete containment (of optLoopTable[i] in new loop).
assert(optLoopTable[i].lpContainedBy(first, bottom));
}
#endif // DEBUG
optLoopTable[loopInd].lpHead = head;
optLoopTable[loopInd].lpFirst = first;
optLoopTable[loopInd].lpTop = top;
optLoopTable[loopInd].lpBottom = bottom;
optLoopTable[loopInd].lpEntry = entry;
optLoopTable[loopInd].lpExit = exit;
optLoopTable[loopInd].lpExitCnt = exitCnt;
optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
optLoopTable[loopInd].lpAsgVars = AllVarSetOps::UninitVal();
optLoopTable[loopInd].lpFlags = 0;
// We haven't yet recorded any side effects.
for (MemoryKind memoryKind : allMemoryKinds())
{
optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false;
}
optLoopTable[loopInd].lpFieldsModified = nullptr;
optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
// If DO-WHILE loop mark it as such.
if (head->bbNext == entry)
{
optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
}
// If single exit loop mark it as such.
if (exitCnt == 1)
{
noway_assert(exit);
optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
}
//
// Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
// We have the following restrictions:
// 1. The loop condition must be a simple one i.e. only one JTRUE node
// 2. There must be a loop iterator (a local var) that is
// incremented (decremented or lsh, rsh, mul) with a constant value
// 3. The iterator is incremented exactly once
// 4. The loop condition must use the iterator.
//
if (bottom->bbJumpKind == BBJ_COND)
{
GenTree* init;
GenTree* test;
GenTree* incr;
if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
{
goto DONE_LOOP;
}
unsigned iterVar = BAD_VAR_NUM;
if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
{
goto DONE_LOOP;
}
// Make sure the "iterVar" initialization is never skipped,
// i.e. every pred of ENTRY other than HEAD is in the loop.
for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
{
BasicBlock* predBlock = predEdge->flBlock;
if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
{
goto DONE_LOOP;
}
}
if (!optPopulateInitInfo(loopInd, init, iterVar))
{
goto DONE_LOOP;
}
// Check that the iterator is used in the loop condition.
if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
{
goto DONE_LOOP;
}
// We know the loop has an iterator at this point ->flag it as LPFLG_ITER
// Record the iterator, the pointer to the test node
// and the initial value of the iterator (constant or local var)
optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
// Record iterator.
optLoopTable[loopInd].lpIterTree = incr;
#if COUNT_LOOPS
// Save the initial value of the iterator - can be lclVar or constant
// Flag the loop accordingly.
iterLoopCount++;
#endif
#if COUNT_LOOPS
simpleTestLoopCount++;
#endif
// Check if a constant iteration loop.
if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
{
// This is a constant loop.
optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
#if COUNT_LOOPS
constIterLoopCount++;
#endif
}
#ifdef DEBUG
if (verbose && 0)
{
printf("\nConstant loop initializer:\n");
gtDispTree(init);
printf("\nConstant loop body:\n");
BasicBlock* block = head;
do
{
block = block->bbNext;
for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
if (stmt->gtStmt.gtStmtExpr == incr)
{
break;
}
printf("\n");
gtDispTree(stmt->gtStmt.gtStmtExpr);
}
} while (block != bottom);
}
#endif // DEBUG
}
DONE_LOOP:
DBEXEC(verbose, optPrintLoopRecording(loopInd));
optLoopCount++;
return true;
}
#ifdef DEBUG
//------------------------------------------------------------------------
// optPrintLoopRecording: Print a recording of the loop.
//
// Arguments:
// loopInd - loop index.
//
void Compiler::optPrintLoopRecording(unsigned loopInd)
{
printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
optLoopTable[loopInd].lpExit);
// If an iterator loop print the iterator and the initialization.
if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
{
printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
printf(" (");
printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper()));
printf(" ");
printf("%d )", optLoopTable[loopInd].lpIterConst());
if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
{
printf(" from %d", optLoopTable[loopInd].lpConstInit);
}
if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
{
printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
}
// If a simple test condition print operator and the limits */
printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper()));
if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
{
printf("%d ", optLoopTable[loopInd].lpConstLimit());
}
if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
{
printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
}
printf("]");
}
printf("\n");
}
void Compiler::optCheckPreds()
{
BasicBlock* block;
BasicBlock* blockPred;
flowList* pred;
for (block = fgFirstBB; block; block = block->bbNext)
{
for (pred = block->bbPreds; pred; pred = pred->flNext)
{
// make sure this pred is part of the BB list
for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
{
if (blockPred == pred->flBlock)
{
break;
}
}
noway_assert(blockPred);
switch (blockPred->bbJumpKind)
{
case BBJ_COND:
if (blockPred->bbJumpDest == block)
{
break;
}
__fallthrough;
case BBJ_NONE:
noway_assert(blockPred->bbNext == block);
break;
case BBJ_EHFILTERRET:
case BBJ_ALWAYS:
case BBJ_EHCATCHRET:
noway_assert(blockPred->bbJumpDest == block);
break;
default:
break;
}
}
}
}
#endif // DEBUG
namespace
{
//------------------------------------------------------------------------
// LoopSearch: Class that handles scanning a range of blocks to detect a loop,
// moving blocks to make the loop body contiguous, and recording
// the loop.
//
// We will use the following terminology:
// HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
// Not part of the looping of the loop.
// FIRST - the lexically first basic block (in bbNext order) within this loop.
// TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
// BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
// EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr
// ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
//
// We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
// When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible,
// and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range
// between TOP and BOTTOM as part of the loop even if they aren't part of the SCC.
// Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges
// from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the
// same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write
// the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore.
//
// |
// v
// head
// |
// | top/first <--+
// | | |
// | ... |
// | | |
// | v |
// +---> entry |
// | |
// ... |
// | |
// v |
// +-- exit/tail |
// | | |
// | ... |
// | | |
// | v |
// | bottom ---+
// |
// +------+
// |
// v
//
class LoopSearch
{
// Keeping track of which blocks are in the loop requires two block sets since we may add blocks
// as we go but the BlockSet type's max ID doesn't increase to accommodate them. Define a helper
// struct to make the ensuing code more readable.
struct LoopBlockSet
{
private:
// Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since
// it can hold all of them.
BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum
// Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but
// indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than
// one new block per old block, this must be sufficient to track any new blocks.
BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum
Compiler* comp;
unsigned int oldBlockMaxNum;
public:
LoopBlockSet(Compiler* comp)
: oldBlocksInLoop(BlockSetOps::UninitVal())
, newBlocksInLoop(BlockSetOps::UninitVal())
, comp(comp)
, oldBlockMaxNum(comp->fgBBNumMax)
{
}
void Reset(unsigned int seedBlockNum)
{
if (BlockSetOps::MayBeUninit(oldBlocksInLoop))
{
// Either the block sets are uninitialized (and long), so we need to initialize
// them (and allocate their backing storage), or they are short and empty, so
// assigning MakeEmpty to them is as cheap as ClearD.
oldBlocksInLoop = BlockSetOps::MakeEmpty(comp);
newBlocksInLoop = BlockSetOps::MakeEmpty(comp);
}
else
{
// We know the backing storage is already allocated, so just clear it.
BlockSetOps::ClearD(comp, oldBlocksInLoop);
BlockSetOps::ClearD(comp, newBlocksInLoop);
}
assert(seedBlockNum <= oldBlockMaxNum);
BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum);
}
bool CanRepresent(unsigned int blockNum)
{
// We can represent old blocks up to oldBlockMaxNum, and
// new blocks up to 2 * oldBlockMaxNum.
return (blockNum <= 2 * oldBlockMaxNum);
}
bool IsMember(unsigned int blockNum)
{
if (blockNum > oldBlockMaxNum)
{
return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
}
return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum);
}
void Insert(unsigned int blockNum)
{
if (blockNum > oldBlockMaxNum)
{
BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
}
else
{
BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
}
}
bool TestAndInsert(unsigned int blockNum)
{
if (blockNum > oldBlockMaxNum)
{
unsigned int shiftedNum = blockNum - oldBlockMaxNum;
if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum))
{
BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum);
return false;
}
}
else
{
if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum))
{
BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
return false;
}
}
return true;
}
};
LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop
Compiler* comp;
// See LoopSearch class comment header for a diagram relating these fields:
BasicBlock* head; // Predecessor of unique entry edge
BasicBlock* first; // Lexically first in-loop block
BasicBlock* top; // Successor of back-edge from BOTTOM
BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block
BasicBlock* entry; // Successor of unique entry edge
BasicBlock* lastExit; // Most recently discovered exit block
unsigned char exitCount; // Number of discovered exit edges
unsigned int oldBlockMaxNum; // Used to identify new blocks created during compaction
BlockSet bottomBlocks; // BOTTOM blocks of already-recorded loops
#ifdef DEBUG
bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions
#endif
bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph
public:
LoopSearch(Compiler* comp)
: loopBlocks(comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp))
{
// Make sure we've renumbered such that the bitsets can hold all the bits
assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize);
}
//------------------------------------------------------------------------
// RecordLoop: Notify the Compiler that a loop has been found.
//
// Return Value:
// true - Loop successfully recorded.
// false - Compiler has run out of loop descriptors; loop not recorded.
//
bool RecordLoop()
{
/* At this point we have a compact loop - record it in the loop table
* If we found only one exit, record it in the table too
* (otherwise an exit = nullptr in the loop table means multiple exits) */
BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr);
if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount))
{
// Record the BOTTOM block for future reference before returning.
assert(bottom->bbNum <= oldBlockMaxNum);
BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum);
return true;
}
// Unable to record this loop because the loop descriptor table overflowed.
return false;
}
//------------------------------------------------------------------------
// ChangedFlowGraph: Determine whether loop compaction has modified the flow graph.
//
// Return Value:
// true - The flow graph has been modified; fgUpdateChangedFlowGraph should
// be called (which is the caller's responsibility).
// false - The flow graph has not been modified by this LoopSearch.
//
bool ChangedFlowGraph()
{
return changedFlowGraph;
}
//------------------------------------------------------------------------
// FindLoop: Search for a loop with the given HEAD block and back-edge.
//
// Arguments:
// head - Block to be the HEAD of any loop identified
// top - Block to be the TOP of any loop identified
// bottom - Block to be the BOTTOM of any loop identified
//
// Return Value:
// true - Found a valid loop.
// false - Did not find a valid loop.
//
// Notes:
// May modify flow graph to make loop compact before returning.
// Will set instance fields to track loop's extent and exits if a valid
// loop is found, and potentially trash them otherwise.
//
bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
{
/* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
* to TOP (note that this is an abuse of notation since this is not necessarily a back edge
* as the definition says, but merely an indication that we have a loop there).
* Thus, we have to be very careful and after entry discovery check that it is indeed
* the only place we enter the loop (especially for non-reducible flow graphs).
*/
if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
{
// Edge from BOTTOM to TOP is not a backward edge
return false;
}
if (bottom->bbNum > oldBlockMaxNum)
{
// Not a true back-edge; bottom is a block added to reconnect fall-through during
// loop processing, so its block number does not reflect its position.
return false;
}
if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
(bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
(bottom->bbJumpKind == BBJ_SWITCH))
{
/* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
* BBJ_SWITCH that has a backward jump appears only for labeled break. */
return false;
}
/* The presence of a "back edge" is an indication that a loop might be present here
*
* LOOP:
* 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
* node in the loop to any other node in the loop (wholly within the loop)
* 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
* in the loop from outside the loop, and that is through the ENTRY
*/
/* Let's find the loop ENTRY */
BasicBlock* entry = FindEntry(head, top, bottom);
if (entry == nullptr)
{
// For now, we only recognize loops where HEAD has some successor ENTRY in the loop.
return false;
}
// Passed the basic checks; initialize instance state for this back-edge.
this->head = head;
this->top = top;
this->entry = entry;
this->bottom = bottom;
this->lastExit = nullptr;
this->exitCount = 0;
// Now we find the "first" block -- the earliest block reachable within the loop.
// With our current algorithm, this is always the same as "top".
this->first = top;
if (!HasSingleEntryCycle())
{
// There isn't actually a loop between TOP and BOTTOM
return false;
}
if (!loopBlocks.IsMember(top->bbNum))
{
// The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY
return false;
}
// Disqualify loops where the first block of the loop is less nested in EH than
// the bottom block. That is, we don't want to handle loops where the back edge
// goes from within an EH region to a first block that is outside that same EH
// region. Note that we *do* handle loops where the first block is the *first*
// block of a more nested EH region (since it is legal to branch to the first
// block of an immediately more nested EH region). So, for example, disqualify
// this:
//
// BB02
// ...
// try {
// ...
// BB10 BBJ_COND => BB02
// ...
// }
//
// Here, BB10 is more nested than BB02.
if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first))
{
JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
"loop.\n",
first->bbNum, bottom->bbNum);
return false;
}
#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
// Disqualify loops where the first block of the loop is a finally target.
// The main problem is when multiple loops share a 'first' block that is a finally
// target and we canonicalize the loops by adding a new loop head. In that case, we
// need to update the blocks so the finally target bit is moved to the newly created
// block, and removed from the old 'first' block. This is 'hard', so at this point
// in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
// long-term), it's easier to disallow the loop than to update the flow graph to
// support this case.
if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
{
JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
return false;
}
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
// Compact the loop (sweep through it and move out any blocks that aren't part of the
// flow cycle), and find the exits.
if (!MakeCompactAndFindExits())
{
// Unable to preserve well-formed loop during compaction.
return false;
}
// We have a valid loop.
return true;
}
private:
//------------------------------------------------------------------------
// FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM
//
// Arguments:
// head - Block to be the HEAD of any loop identified
// top - Block to be the TOP of any loop identified
// bottom - Block to be the BOTTOM of any loop identified
//
// Return Value:
// Block to be the ENTRY of any loop identified, or nullptr if no
// such entry meeting our criteria can be found.
//
// Notes:
// Returns main entry if one is found, does not check for side-entries.
//
BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
{
if (head->bbJumpKind == BBJ_ALWAYS)
{
if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
{
/* OK - we enter somewhere within the loop */
/* some useful asserts
* Cannot enter at the top - should have being caught by redundant jumps */
assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
return head->bbJumpDest;
}
else
{
/* special case - don't consider now */
// assert (!"Loop entered in weird way!");
return nullptr;
}
}
// Can we fall through into the loop?
else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
{
/* The ENTRY is at the TOP (a do-while loop) */
return top;
}
else
{
return nullptr; // head does not flow into the loop bail for now
}
}
//------------------------------------------------------------------------
// HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting
// only blocks between TOP and BOTTOM, to determine if such a cycle
// exists and if it has a single entry.
//
// Return Value:
// true - Found a single-entry cycle.
// false - Did not find a single-entry cycle.
//
// Notes:
// Will mark (in `loopBlocks`) all blocks found to participate in the
// cycle.
//
bool HasSingleEntryCycle()
{
// Now do a backwards flow walk from entry to see if we have a single-entry loop
bool foundCycle = false;
// Seed the loop block set and worklist with the entry block.
loopBlocks.Reset(entry->bbNum);
jitstd::list<BasicBlock*> worklist(comp->getAllocator());
worklist.push_back(entry);
while (!worklist.empty())
{
BasicBlock* block = worklist.back();
worklist.pop_back();
/* Make sure ENTRY dominates all blocks in the loop
* This is necessary to ensure condition 2. above
*/
if (block->bbNum > oldBlockMaxNum)
{
// This is a new block we added to connect fall-through, so the
// recorded dominator information doesn't cover it. Just continue,
// and when we process its unique predecessor we'll abort if ENTRY
// doesn't dominate that.
}
else if (!comp->fgDominate(entry, block))
{
return false;
}
// Add preds to the worklist, checking for side-entries.
for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext)
{
BasicBlock* pred = predIter->flBlock;
unsigned int testNum = PositionNum(pred);
if ((testNum < top->bbNum) || (testNum > bottom->bbNum))
{
// Pred is out of loop range
if (block == entry)
{
if (pred == head)
{
// This is the single entry we expect.
continue;
}
// ENTRY has some pred other than head outside the loop. If ENTRY does not
// dominate this pred, we'll consider this a side-entry and skip this loop;
// otherwise the loop is still valid and this may be a (flow-wise) back-edge
// of an outer loop. For the dominance test, if `pred` is a new block, use
// its unique predecessor since the dominator tree has info for that.
BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred);
if (comp->fgDominate(entry, effectivePred))
{
// Outer loop back-edge
continue;
}
}
// There are multiple entries to this loop, don't consider it.
return false;
}
bool isFirstVisit;
if (pred == entry)
{
// We have indeed found a cycle in the flow graph.
isFirstVisit = !foundCycle;
foundCycle = true;
assert(loopBlocks.IsMember(pred->bbNum));
}
else if (loopBlocks.TestAndInsert(pred->bbNum))
{
// Already visited this pred
isFirstVisit = false;
}
else
{
// Add this pred to the worklist
worklist.push_back(pred);
isFirstVisit = true;
}
if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum))
{
// We've created a new block immediately after `pred` to
// reconnect what was fall-through. Mark it as in-loop also;
// it needs to stay with `prev` and if it exits the loop we'd
// just need to re-create it if we tried to move it out.
loopBlocks.Insert(pred->bbNext->bbNum);
}
}
}
return foundCycle;
}
//------------------------------------------------------------------------
// PositionNum: Get the number identifying a block's position per the
// lexical ordering that existed before searching for (and compacting)
// loops.
//
// Arguments:
// block - Block whose position is desired.
//
// Return Value:
// A number indicating that block's position relative to others.
//
// Notes:
// When the given block is a new one created during loop compaction,
// the number of its unique predecessor is returned.
//
unsigned int PositionNum(BasicBlock* block)
{
if (block->bbNum > oldBlockMaxNum)
{
// This must be a block we inserted to connect fall-through after moving blocks.
// To determine if it's in the loop or not, use the number of its unique predecessor
// block.
assert(block->bbPreds->flBlock == block->bbPrev);
assert(block->bbPreds->flNext == nullptr);
return block->bbPrev->bbNum;
}
return block->bbNum;
}
//------------------------------------------------------------------------
// MakeCompactAndFindExits: Compact the loop (sweep through it and move out
// any blocks that aren't part of the flow cycle), and find the exits (set
// lastExit and exitCount).
//
// Return Value:
// true - Loop successfully compacted (or `loopBlocks` expanded to
// include all blocks in the lexical range), exits enumerated.
// false - Loop cannot be made compact and remain well-formed.
//
bool MakeCompactAndFindExits()
{
// Compaction (if it needs to happen) will require an insertion point.
BasicBlock* moveAfter = nullptr;
for (BasicBlock* previous = top->bbPrev; previous != bottom;)
{
BasicBlock* block = previous->bbNext;
if (loopBlocks.IsMember(block->bbNum))
{
// This block is a member of the loop. Check to see if it may exit the loop.
CheckForExit(block);
// Done processing this block; move on to the next.
previous = block;
continue;
}
// This blocks is lexically between TOP and BOTTOM, but it does not
// participate in the flow cycle. Check for a run of consecutive
// such blocks.
BasicBlock* lastNonLoopBlock = block;
BasicBlock* nextLoopBlock = block->bbNext;
while (!loopBlocks.IsMember(nextLoopBlock->bbNum))
{
lastNonLoopBlock = nextLoopBlock;
nextLoopBlock = nextLoopBlock->bbNext;
// This loop must terminate because we know BOTTOM is in loopBlocks.
}
// Choose an insertion point for non-loop blocks if we haven't yet done so.
if (moveAfter == nullptr)
{
moveAfter = FindInsertionPoint();
}
if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) || !BasicBlock::sameEHRegion(previous, moveAfter))
{
// EH regions would be ill-formed if we moved these blocks out.
// See if we can consider them loop blocks without introducing
// a side-entry.
if (CanTreatAsLoopBlocks(block, lastNonLoopBlock))
{
// The call to `canTreatAsLoop` marked these blocks as part of the loop;
// iterate without updating `previous` so that we'll analyze them as part
// of the loop.
continue;
}
else
{
// We can't move these out of the loop or leave them in, so just give
// up on this loop.
return false;
}
}
// Now physically move the blocks.
BasicBlock* moveBefore = moveAfter->bbNext;
comp->fgUnlinkRange(block, lastNonLoopBlock);
comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter);
comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock);
// Apply any adjustments needed for fallthrough at the boundaries of the moved region.
FixupFallThrough(moveAfter, moveBefore, block);
FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore);
// Also apply any adjustments needed where the blocks were snipped out of the loop.
BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock);
if (newBlock != nullptr)
{
// This new block is in the loop and is a loop exit.
loopBlocks.Insert(newBlock->bbNum);
lastExit = newBlock;
++exitCount;
}
// Update moveAfter for the next insertion.
moveAfter = lastNonLoopBlock;
// Note that we've changed the flow graph, and continue without updating
// `previous` so that we'll process nextLoopBlock.
changedFlowGraph = true;
}
if ((exitCount == 1) && (lastExit == nullptr))
{
// If we happen to have a loop with two exits, one of which goes to an
// infinite loop that's lexically nested inside it, where the inner loop
// can't be moved out, we can end up in this situation (because
// CanTreatAsLoopBlocks will have decremented the count expecting to find
// another exit later). Bump the exit count to 2, since downstream code
// will not be prepared for null lastExit with exitCount of 1.
assert(forgotExit);
exitCount = 2;
}
// Loop compaction was successful
return true;
}
//------------------------------------------------------------------------
// FindInsertionPoint: Find an appropriate spot to which blocks that are
// lexically between TOP and BOTTOM but not part of the flow cycle
// can be moved.
//
// Return Value:
// Block after which to insert moved blocks.
//
BasicBlock* FindInsertionPoint()
{
// Find an insertion point for blocks we're going to move. Move them down
// out of the loop, and if possible find a spot that won't break up fall-through.
BasicBlock* moveAfter = bottom;
while (moveAfter->bbFallsThrough())
{
// Keep looking for a better insertion point if we can.
BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter);
if (newMoveAfter == nullptr)
{
// Ran out of candidate insertion points, so just split up the fall-through.
return moveAfter;
}
moveAfter = newMoveAfter;
}
return moveAfter;
}
//------------------------------------------------------------------------
// TryAdvanceInsertionPoint: Find the next legal insertion point after
// the given one, if one exists.
//
// Arguments:
// oldMoveAfter - Prior insertion point; find the next after this.
//
// Return Value:
// The next block after `oldMoveAfter` that is a legal insertion point
// (i.e. blocks being swept out of the loop can be moved immediately
// after it), if one exists, else nullptr.
//
BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter)
{
BasicBlock* newMoveAfter = oldMoveAfter->bbNext;
if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter))
{
// Don't cross an EH region boundary.
return nullptr;
}
if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) || (newMoveAfter->bbJumpKind == BBJ_COND))
{
unsigned int destNum = newMoveAfter->bbJumpDest->bbNum;
if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum))
{
// Reversing this branch out of block `newMoveAfter` could confuse this algorithm
// (in particular, the edge would still be numerically backwards but no longer be
// lexically backwards, so a lexical forward walk from TOP would not find BOTTOM),
// so don't do that.
// We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to
// check for BBJ_SWITCH because we'd never consider it a loop back-edge.
return nullptr;
}
}
// Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order
// of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would
// introduce a new lexical back-edge, which could (maybe?) confuse the loop search
// algorithm, and isn't desirable layout anyway.
for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext)
{
unsigned int predNum = predIter->flBlock->bbNum;
if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum))
{
// Don't make this forward edge a backwards edge.
return nullptr;
}
}
if (IsRecordedBottom(newMoveAfter))
{
// This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them
// out of that loop (we should have already done so when processing that loop if it were legal).
return nullptr;
}
// Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS
// pair, so if we've got such a pair recurse to see if we can move past the whole thing.
return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter);
}
//------------------------------------------------------------------------
// isOuterBottom: Determine if the given block is the BOTTOM of a previously
// recorded loop.
//
// Arguments:
// block - Block to check for BOTTOM-ness.
//
// Return Value:
// true - The blocks was recorded as `bottom` of some earlier-processed loop.
// false - No loops yet recorded have this block as their `bottom`.
//
bool IsRecordedBottom(BasicBlock* block)
{
if (block->bbNum > oldBlockMaxNum)
{
// This is a new block, which can't be an outer bottom block because we only allow old blocks
// as BOTTOM.
return false;
}
return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum);
}
//------------------------------------------------------------------------
// CanTreatAsLoopBlocks: If the given range of blocks can be treated as
// loop blocks, add them to loopBlockSet and return true. Otherwise,
// return false.
//
// Arguments:
// firstNonLoopBlock - First block in the run to be subsumed.
// lastNonLoopBlock - Last block in the run to be subsumed.
//
// Return Value:
// true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were
// successfully added to `loopBlocks`.
// false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock`
// would not be legal (it would induce a side-entry).
//
// Notes:
// `loopBlocks` may be modified even if `false` is returned.
// `exitCount` and `lastExit` may be modified if this process identifies
// in-loop edges that were previously counted as exits.
//
bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock)
{
BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext;
for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext)
{
for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext)
{
BasicBlock* testPred = predIter->flBlock;
unsigned int predPosNum = PositionNum(testPred);
unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock);
unsigned int lastNonLoopPosNum = PositionNum(lastNonLoopBlock);
if (loopBlocks.IsMember(predPosNum) ||
((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum)))
{
// This pred is in the loop (or what will be the loop if we determine this
// run of exit blocks doesn't include a side-entry).
if (predPosNum < firstNonLoopPosNum)
{
// We've already counted this block as an exit, so decrement the count.
--exitCount;
if (lastExit == testPred)
{
// Erase this now-bogus `lastExit` entry.
lastExit = nullptr;
INDEBUG(forgotExit = true);
}
}
}
else
{
// This pred is not in the loop, so this constitutes a side-entry.
return false;
}
}
// Either we're going to abort the loop on a subsequent testBlock, or this
// testBlock is part of the loop.
loopBlocks.Insert(testBlock->bbNum);
}
// All blocks were ok to leave in the loop.
return true;
}
//------------------------------------------------------------------------
// FixupFallThrough: Re-establish any broken control flow connectivity
// and eliminate any "goto-next"s that were created by changing the
// given block's lexical follower.
//
// Arguments:
// block - Block whose `bbNext` has changed.
// oldNext - Previous value of `block->bbNext`.
// newNext - New value of `block->bbNext`.
//
// Return Value:
// If a new block is created to reconnect flow, the new block is
// returned; otherwise, nullptr.
//
BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext)
{
// If we create a new block, that will be our return value.
BasicBlock* newBlock = nullptr;
if (block->bbFallsThrough())
{
// Need to reconnect the flow from `block` to `oldNext`.
if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext))
{
/* Reverse the jump condition */
GenTree* test = block->lastNode();
noway_assert(test->OperIsConditionalJump());
if (test->OperGet() == GT_JTRUE)
{
GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1);
assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
test->gtOp.gtOp1 = cond;
}
else
{
comp->gtReverseCond(test);
}
// Redirect the Conditional JUMP to go to `oldNext`
block->bbJumpDest = oldNext;
}
else
{
// Insert an unconditional jump to `oldNext` just after `block`.
newBlock = comp->fgConnectFallThrough(block, oldNext);
noway_assert((newBlock == nullptr) || loopBlocks.CanRepresent(newBlock->bbNum));
}
}
else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext))
{
// We've made `block`'s jump target its bbNext, so remove the jump.
if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev))
{
// If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to
// prevent assertions from complaining about it.
block->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
}
}
// Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS.
assert((block->bbJumpKind != BBJ_COND) || (block->bbJumpKind != BBJ_ALWAYS) || (block->bbJumpDest != newNext) ||
((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0));
return newBlock;
}
//------------------------------------------------------------------------
// CheckForExit: Check if the given block has any successor edges that are
// loop exits, and update `lastExit` and `exitCount` if so.
//
// Arguments:
// block - Block whose successor edges are to be checked.
//
// Notes:
// If one block has multiple exiting successor edges, those are counted
// as multiple exits in `exitCount`.
//
void CheckForExit(BasicBlock* block)
{
BasicBlock* exitPoint;
switch (block->bbJumpKind)
{
case BBJ_COND:
case BBJ_CALLFINALLY:
case BBJ_ALWAYS:
case BBJ_EHCATCHRET:
assert(block->bbJumpDest);
exitPoint = block->bbJumpDest;
if (!loopBlocks.IsMember(exitPoint->bbNum))
{
/* exit from a block other than BOTTOM */
lastExit = block;
exitCount++;
}
break;
case BBJ_NONE:
break;
case BBJ_EHFINALLYRET:
case BBJ_EHFILTERRET:
/* The "try" associated with this "finally" must be in the
* same loop, so the finally block will return control inside the loop */
break;
case BBJ_THROW:
case BBJ_RETURN:
/* those are exits from the loop */
lastExit = block;
exitCount++;
break;
case BBJ_SWITCH:
unsigned jumpCnt;
jumpCnt = block->bbJumpSwt->bbsCount;
BasicBlock** jumpTab;
jumpTab = block->bbJumpSwt->bbsDstTab;
do
{
noway_assert(*jumpTab);
exitPoint = *jumpTab;
if (!loopBlocks.IsMember(exitPoint->bbNum))
{
lastExit = block;
exitCount++;
}
} while (++jumpTab, --jumpCnt);
break;
default:
noway_assert(!"Unexpected bbJumpKind");
break;
}
if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum))
{
// Found a fall-through exit.
lastExit = block;
exitCount++;
}
}
};
}
/*****************************************************************************
* Find the natural loops, using dominators. Note that the test for
* a loop is slightly different from the standard one, because we have
* not done a depth first reordering of the basic blocks.
*/
void Compiler::optFindNaturalLoops()
{
#ifdef DEBUG
if (verbose)
{
printf("*************** In optFindNaturalLoops()\n");
}
#endif // DEBUG
noway_assert(fgDomsComputed);
assert(fgHasLoops);
#if COUNT_LOOPS
hasMethodLoops = false;
loopsThisMethod = 0;
loopOverflowThisMethod = false;
#endif
LoopSearch search(this);
for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext)
{
BasicBlock* top = head->bbNext;
// Blocks that are rarely run have a zero bbWeight and should
// never be optimized here
if (top->bbWeight == BB_ZERO_WEIGHT)
{
continue;
}
for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
{
if (search.FindLoop(head, top, pred->flBlock))
{
// Found a loop; record it and see if we've hit the limit.
bool recordedLoop = search.RecordLoop();
(void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG
#if COUNT_LOOPS
if (!hasMethodLoops)
{
/* mark the method as containing natural loops */
totalLoopMethods++;
hasMethodLoops = true;
}
/* increment total number of loops found */
totalLoopCount++;
loopsThisMethod++;
/* keep track of the number of exits */
loopExitCountTable.record(static_cast<unsigned>(exitCount));
#else // COUNT_LOOPS
assert(recordedLoop);
if (optLoopCount == MAX_LOOP_NUM)
{
// We won't be able to record any more loops, so stop looking.
goto NO_MORE_LOOPS;
}
#endif // COUNT_LOOPS
// Continue searching preds of `top` to see if any other are
// back-edges (this can happen for nested loops). The iteration
// is safe because the compaction we do only modifies predecessor
// lists of blocks that gain or lose fall-through from their
// `bbPrev`, but since the motion is from within the loop to below
// it, we know we're not altering the relationship between `top`
// and its `bbPrev`.
}
}
}
NO_MORE_LOOPS:
#if COUNT_LOOPS
loopCountTable.record(loopsThisMethod);
if (maxLoopsPerMethod < loopsThisMethod)
{
maxLoopsPerMethod = loopsThisMethod;
}
if (loopOverflowThisMethod)
{
totalLoopOverflows++;
}
#endif // COUNT_LOOPS
bool mod = search.ChangedFlowGraph();
if (mod)
{
// Need to renumber blocks now since loop canonicalization
// depends on it; can defer the rest of fgUpdateChangedFlowGraph()
// until after canonicalizing loops. Dominator information is
// recorded in terms of block numbers, so flag it invalid.
fgDomsComputed = false;
fgRenumberBlocks();
}
// Now the loop indices are stable. We can figure out parent/child relationships
// (using table indices to name loops), and label blocks.
for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
{
for (unsigned char possibleParent = loopInd; possibleParent > 0;)
{
possibleParent--;
if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
{
optLoopTable[loopInd].lpParent = possibleParent;
optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
optLoopTable[possibleParent].lpChild = loopInd;
break;
}
}
}
// Now label the blocks with the innermost loop to which they belong. Since parents
// precede children in the table, doing the labeling for each loop in order will achieve
// this -- the innermost loop labeling will be done last.
for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
{
BasicBlock* first = optLoopTable[loopInd].lpFirst;
BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
{
blk->bbNatLoopNum = loopInd;
if (blk == bottom)
{
break;
}
assert(blk->bbNext != nullptr); // We should never reach nullptr.
}
}
// Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
// one, if necessary, for loops containing others that share a "top."
for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
{
// Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
{
continue;
}
// Otherwise...
if (optCanonicalizeLoopNest(loopInd))
{
mod = true;
}
}
if (mod)
{
fgUpdateChangedFlowGraph();
}
#ifdef DEBUG
if (verbose && optLoopCount > 0)
{
printf("\nFinal natural loop table:\n");
for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
{
optPrintLoopInfo(loopInd);
printf("\n");
}
}
#endif // DEBUG
}
void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
{
BasicBlock* newJumpDest = nullptr;
switch (blk->bbJumpKind)
{
case BBJ_THROW:
case BBJ_RETURN:
case BBJ_NONE:
case BBJ_EHFILTERRET:
case BBJ_EHFINALLYRET:
case BBJ_EHCATCHRET:
// These have no jump destination to update.
break;
case BBJ_ALWAYS:
case BBJ_LEAVE:
case BBJ_CALLFINALLY:
case BBJ_COND:
// All of these have a single jump destination to update.
if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
{
blk->bbJumpDest = newJumpDest;
}
break;
case BBJ_SWITCH:
{
bool redirected = false;
for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
{
if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
{
blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
redirected = true;
}
}
// If any redirections happend, invalidate the switch table map for the switch.
if (redirected)
{
// Don't create a new map just to try to remove an entry.
BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false);
if (switchMap != nullptr)
{
switchMap->Remove(blk);
}
}
}
break;
default:
unreached();
}
}
// TODO-Cleanup: This should be a static member of the BasicBlock class.
void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
{
assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
// copy the jump destination(s) from "from" to "to".
switch (to->bbJumpKind)
{
case BBJ_ALWAYS:
case BBJ_LEAVE:
case BBJ_CALLFINALLY:
case BBJ_COND:
// All of these have a single jump destination to update.
to->bbJumpDest = from->bbJumpDest;
break;
case BBJ_SWITCH:
{
to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
{
to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
}
}
break;
default:
break;
}
}
// Canonicalize the loop nest rooted at parent loop 'loopInd'.
// Returns 'true' if the flow graph is modified.
bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
{
bool modified = false;
// Is the top of the current loop not in any nested loop?
if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
{
if (optCanonicalizeLoop(loopInd))
{
modified = true;
}
}
for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
child = optLoopTable[child].lpSibling)
{
if (optCanonicalizeLoopNest(child))
{
modified = true;
}
}
return modified;
}
bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
{
// Is the top uniquely part of the current loop?
BasicBlock* t = optLoopTable[loopInd].lpTop;
if (t->bbNatLoopNum == loopInd)
{
return false;
}
JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
"canonicalize\n",
loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
// Otherwise, the top of this loop is also part of a nested loop.
//
// Insert a new unique top for this loop. We must be careful to put this new
// block in the correct EH region. Note that f->bbPrev might be in a different
// EH region. For example:
//
// try {
// ...
// BB07
// }
// BB08 // "first"
//
// In this case, first->bbPrev is BB07, which is in a different 'try' region.
// On the other hand, the first block of multiple loops might be the first
// block of a 'try' region that is completely contained in the multiple loops.
// for example:
//
// BB08 try { }
// ...
// BB10 BBJ_ALWAYS => BB08
// ...
// BB12 BBJ_ALWAYS => BB08
//
// Here, we have two loops, both with BB08 as the "first" block. Block BB08
// is a single-block "try" region. Neither loop "bottom" block is in the same
// "try" region as BB08. This is legal because you can jump to the first block
// of a try region. With EH normalization, no two "try" regions will share
// this block. In this case, we need to insert a new block for the outer loop
// in the same EH region as the branch from the "bottom":
//
// BB30 BBJ_NONE
// BB08 try { }
// ...
// BB10 BBJ_ALWAYS => BB08
// ...
// BB12 BBJ_ALWAYS => BB30
//
// Another possibility is that the "first" block of the loop nest can be the first block
// of a "try" region that also has other predecessors than those in the loop, or even in
// the "try" region (since blocks can target the first block of a "try" region). For example:
//
// BB08 try {
// ...
// BB10 BBJ_ALWAYS => BB08
// ...
// BB12 BBJ_ALWAYS => BB08
// BB13 }
// ...
// BB20 BBJ_ALWAYS => BB08
// ...
// BB25 BBJ_ALWAYS => BB08
//
// Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
// bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
// same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
// the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
// old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
// (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
// situation of branching to a non-first block of a 'try' region.
//
// We can also have a loop nest where the "first" block is outside of a "try" region
// and the back edges are inside a "try" region, for example:
//
// BB02 // "first"
// ...
// BB09 try { BBJ_COND => BB02
// ...
// BB15 BBJ_COND => BB02
// ...
// BB21 } // end of "try"
//
// In this case, both loop back edges were formed by "leave" instructions that were
// imported into branches that were later made conditional. In this case, we don't
// want to copy the EH region of the back edge, since that would create a block
// outside of and disjoint with the "try" region of the back edge. However, to
// simplify things, we disqualify this type of loop, so we should never see this here.
BasicBlock* h = optLoopTable[loopInd].lpHead;
BasicBlock* f = optLoopTable[loopInd].lpFirst;
BasicBlock* b = optLoopTable[loopInd].lpBottom;
// The loop must be entirely contained within a single handler region.
assert(BasicBlock::sameHndRegion(f, b));
// If the bottom block is in the same "try" region, then we extend the EH
// region. Otherwise, we add the new block outside the "try" region.
bool extendRegion = BasicBlock::sameTryRegion(f, b);
BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
if (!extendRegion)
{
// We need to set the EH region manually. Set it to be the same
// as the bottom block.
newT->copyEHRegion(b);
}
// The new block can reach the same set of blocks as the old one, but don't try to reflect
// that in its reachability set here -- creating the new block may have changed the BlockSet
// representation from short to long, and canonicalizing loops is immediately followed by
// a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway.
// Redirect the "bottom" of the current loop to "newT".
BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
blockMap->Set(t, newT);
optRedirectBlock(b, blockMap);
// Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
// to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
// to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
// they might have been prevented from participating in the loop nest due to different EH nesting, or some
// other reason.
//
// Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
// multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
// This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
// edge of the blockMap, so nothing will happen.
bool firstPred = true;
for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
{
BasicBlock* topPredBlock = topPred->flBlock;
// Skip if topPredBlock is in the loop.
// Note that this uses block number to detect membership in the loop. We are adding blocks during
// canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
// outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
{
JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
"redirecting its bottom edge\n",
topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
continue;
}
JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
newT->bbNum);
optRedirectBlock(topPredBlock, blockMap);
// When we have profile data then the 'newT' block will inherit topPredBlock profile weight
if (topPredBlock->hasProfileWeight())
{
// This corrects an issue when the topPredBlock has a profile based weight
//
if (firstPred)
{
JITDUMP("in optCanonicalizeLoop: block BB%02u will inheritWeight from BB%02u\n", newT->bbNum,
topPredBlock->bbNum);
newT->inheritWeight(topPredBlock);
firstPred = false;
}
else
{
JITDUMP("in optCanonicalizeLoop: block BB%02u will also contribute to the weight of BB%02u\n",
newT->bbNum, topPredBlock->bbNum);
BasicBlock::weight_t newWeight = newT->getBBWeight(this) + topPredBlock->getBBWeight(this);
newT->setBBWeight(newWeight);
}
}
}
assert(newT->bbNext == f);
if (f != t)
{
newT->bbJumpKind = BBJ_ALWAYS;
newT->bbJumpDest = t;
newT->bbTreeList = nullptr;
fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
}
// If it had been a do-while loop (top == entry), update entry, as well.
BasicBlock* origE = optLoopTable[loopInd].lpEntry;
if (optLoopTable[loopInd].lpTop == origE)
{
optLoopTable[loopInd].lpEntry = newT;
}
optLoopTable[loopInd].lpTop = newT;
optLoopTable[loopInd].lpFirst = newT;
newT->bbNatLoopNum = loopInd;
JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
dspPtr(newT), loopInd);
// Make sure the head block still goes to the entry...
if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
{
h->bbJumpKind = BBJ_ALWAYS;
h->bbJumpDest = optLoopTable[loopInd].lpEntry;
}
else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
{
BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
optLoopTable[loopInd].lpHead = h2;
h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
h2->bbTreeList = nullptr;
fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
}
// If any loops nested in "loopInd" have the same head and entry as "loopInd",
// it must be the case that they were do-while's (since "h" fell through to the entry).
// The new node "newT" becomes the head of such loops.
for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
childLoop = optLoopTable[childLoop].lpSibling)
{
if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
{
optUpdateLoopHead(childLoop, h, newT);
}
}
return true;
}
bool Compiler::optLoopContains(unsigned l1, unsigned l2)
{
assert(l1 != BasicBlock::NOT_IN_LOOP);
if (l1 == l2)
{
return true;
}
else if (l2 == BasicBlock::NOT_IN_LOOP)
{
return false;
}
else
{
return optLoopContains(l1, optLoopTable[l2].lpParent);
}
}
void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
{
assert(optLoopTable[loopInd].lpHead == from);
optLoopTable[loopInd].lpHead = to;
for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
childLoop = optLoopTable[childLoop].lpSibling)
{
if (optLoopTable[childLoop].lpHead == from)
{
optUpdateLoopHead(childLoop, from, to);
}
}
}
/*****************************************************************************
* If the : i += const" will cause an overflow exception for the small types.
*/
bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
{
int type_MAX;
switch (incrType)
{
case TYP_BYTE:
type_MAX = SCHAR_MAX;
break;
case TYP_UBYTE:
type_MAX = UCHAR_MAX;
break;
case TYP_SHORT:
type_MAX = SHRT_MAX;
break;
case TYP_USHORT:
type_MAX = USHRT_MAX;
break;
case TYP_UINT: // Detected by checking for 32bit ....
case TYP_INT:
return false; // ... overflow same as done for TYP_INT
default:
NO_WAY("Bad type");
}
if (iterAtExit > type_MAX)
{
return true;
}
else
{
return false;
}
}
/*****************************************************************************
* If the "i -= const" will cause an underflow exception for the small types
*/
bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
{
int type_MIN;
switch (decrType)
{
case TYP_BYTE:
type_MIN = SCHAR_MIN;
break;
case TYP_SHORT:
type_MIN = SHRT_MIN;
break;
case TYP_UBYTE:
type_MIN = 0;
break;
case TYP_USHORT:
type_MIN = 0;
break;
case TYP_UINT: // Detected by checking for 32bit ....
case TYP_INT:
return false; // ... underflow same as done for TYP_INT
default:
NO_WAY("Bad type");
}
if (iterAtExit < type_MIN)
{
return true;
}
else
{
return false;
}
}
/*****************************************************************************
*
* Helper for unroll loops - Computes the number of repetitions
* in a constant loop. If it cannot prove the number is constant returns false
*/
bool Compiler::optComputeLoopRep(int constInit,
int constLimit,
int iterInc,
genTreeOps iterOper,
var_types iterOperType,
genTreeOps testOper,
bool unsTest,
bool dupCond,
unsigned* iterCount)
{
noway_assert(genActualType(iterOperType) == TYP_INT);
__int64 constInitX;
__int64 constLimitX;
unsigned loopCount;
int iterSign;
// Using this, we can just do a signed comparison with other 32 bit values.
if (unsTest)
{
constLimitX = (unsigned int)constLimit;
}
else
{
constLimitX = (signed int)constLimit;
}
switch (iterOperType)
{
// For small types, the iteration operator will narrow these values if big
#define INIT_ITER_BY_TYPE(type) \
constInitX = (type)constInit; \
iterInc = (type)iterInc;
case TYP_BYTE:
INIT_ITER_BY_TYPE(signed char);
break;
case TYP_UBYTE:
INIT_ITER_BY_TYPE(unsigned char);
break;
case TYP_SHORT:
INIT_ITER_BY_TYPE(signed short);
break;
case TYP_USHORT:
INIT_ITER_BY_TYPE(unsigned short);
break;
// For the big types, 32 bit arithmetic is performed
case TYP_INT:
case TYP_UINT:
if (unsTest)
{
constInitX = (unsigned int)constInit;
}
else
{
constInitX = (signed int)constInit;
}
break;
default:
noway_assert(!"Bad type");
NO_WAY("Bad type");
}
/* If iterInc is zero we have an infinite loop */
if (iterInc == 0)
{
return false;
}
/* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
iterSign = (iterInc > 0) ? +1 : -1;
/* Initialize loopCount to zero */
loopCount = 0;
// If dupCond is true then the loop head contains a test which skips
// this loop, if the constInit does not pass the loop test
// Such a loop can execute zero times.
// If dupCond is false then we have a true do-while loop which we
// always execute the loop once before performing the loop test
if (!dupCond)
{
loopCount += 1;
constInitX += iterInc;
}
// bail if count is based on wrap-around math
if (iterInc > 0)
{
if (constLimitX < constInitX)
{
return false;
}
}
else if (constLimitX > constInitX)
{
return false;
}
/* Compute the number of repetitions */
switch (testOper)
{
__int64 iterAtExitX;
case GT_EQ:
/* something like "for (i=init; i == lim; i++)" doesn't make any sense */
return false;
case GT_NE:
/* "for (i=init; i != lim; i+=const)" - this is tricky since it may
* have a constant number of iterations or loop forever -
* we have to compute (lim-init) mod iterInc to see if it is zero.
* If mod iterInc is not zero then the limit test will miss an a wrap will occur
* which is probably not what the end user wanted, but it is legal.
*/
if (iterInc > 0)
{
/* Stepping by one, i.e. Mod with 1 is always zero */
if (iterInc != 1)
{
if (((constLimitX - constInitX) % iterInc) != 0)
{
return false;
}
}
}
else
{
noway_assert(iterInc < 0);
/* Stepping by -1, i.e. Mod with 1 is always zero */
if (iterInc != -1)
{
if (((constInitX - constLimitX) % (-iterInc)) != 0)
{
return false;
}
}
}
switch (iterOper)
{
case GT_SUB:
iterInc = -iterInc;
__fallthrough;
case GT_ADD:
if (constInitX != constLimitX)
{
loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
}
iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
if (unsTest)
{
iterAtExitX = (unsigned)iterAtExitX;
}
// Check if iteration incr will cause overflow for small types
if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
{
return false;
}
// iterator with 32bit overflow. Bad for TYP_(U)INT
if (iterAtExitX < constLimitX)
{
return false;
}
*iterCount = loopCount;
return true;
case GT_MUL:
case GT_DIV:
case GT_RSH:
case GT_LSH:
case GT_UDIV:
return false;
default:
noway_assert(!"Unknown operator for loop iterator");
return false;
}
case GT_LT:
switch (iterOper)
{
case GT_SUB:
iterInc = -iterInc;
__fallthrough;
case GT_ADD:
if (constInitX < constLimitX)
{
loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
}
iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
if (unsTest)
{
iterAtExitX = (unsigned)iterAtExitX;
}
// Check if iteration incr will cause overflow for small types
if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
{
return false;
}
// iterator with 32bit overflow. Bad for TYP_(U)INT
if (iterAtExitX < constLimitX)
{
return false;
}
*iterCount = loopCount;
return true;
case GT_MUL:
case GT_DIV:
case GT_RSH:
case GT_LSH:
case GT_UDIV:
return false;
default:
noway_assert(!"Unknown operator for loop iterator");
return false;
}
case GT_LE:
switch (iterOper)
{
case GT_SUB:
iterInc = -iterInc;
__fallthrough;
case GT_ADD:
if (constInitX <= constLimitX)
{
loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
}
iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
if (unsTest)
{
iterAtExitX = (unsigned)iterAtExitX;
}
// Check if iteration incr will cause overflow for small types
if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
{
return false;
}
// iterator with 32bit overflow. Bad for TYP_(U)INT
if (iterAtExitX <= constLimitX)
{
return false;
}
*iterCount = loopCount;
return true;
case GT_MUL:
case GT_DIV:
case GT_RSH:
case GT_LSH:
case GT_UDIV:
return false;
default:
noway_assert(!"Unknown operator for loop iterator");
return false;
}
case GT_GT:
switch (iterOper)
{
case GT_SUB:
iterInc = -iterInc;
__fallthrough;
case GT_ADD:
if (constInitX > constLimitX)
{
loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
}
iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
if (unsTest)
{
iterAtExitX = (unsigned)iterAtExitX;
}
// Check if small types will underflow
if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
{
return false;
}
// iterator with 32bit underflow. Bad for TYP_INT and unsigneds
if (iterAtExitX > constLimitX)
{
return false;
}
*iterCount = loopCount;
return true;
case GT_MUL:
case GT_DIV:
case GT_RSH:
case GT_LSH:
case GT_UDIV:
return false;
default:
noway_assert(!"Unknown operator for loop iterator");
return false;
}
case GT_GE:
switch (iterOper)
{
case GT_SUB:
iterInc = -iterInc;
__fallthrough;
case GT_ADD:
if (constInitX >= constLimitX)
{
loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
}
iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
if (unsTest)
{
iterAtExitX = (unsigned)iterAtExitX;
}
// Check if small types will underflow
if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
{
return false;
}
// iterator with 32bit underflow. Bad for TYP_INT and unsigneds
if (iterAtExitX >= constLimitX)
{
return false;
}
*iterCount = loopCount;
return true;
case GT_MUL:
case GT_DIV:
case GT_RSH:
case GT_LSH:
case GT_UDIV:
return false;
default:
noway_assert(!"Unknown operator for loop iterator");
return false;
}
default:
noway_assert(!"Unknown operator for loop condition");
}
return false;
}
/*****************************************************************************
*
* Look for loop unrolling candidates and unroll them
*/
#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
void Compiler::optUnrollLoops()
{
if (compCodeOpt() == SMALL_CODE)
{
return;
}
if (optLoopCount == 0)
{
return;
}
#ifdef DEBUG
if (JitConfig.JitNoUnroll())
{
return;
}
#endif
#ifdef DEBUG
if (verbose)
{
printf("*************** In optUnrollLoops()\n");
}
#endif
/* Look for loop unrolling candidates */
bool change = false;
// Visit loops from highest to lowest number to vist them in innermost
// to outermost order
for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
{
// This is necessary due to an apparent analysis limitation since
// optLoopCount must be strictly greater than 0 upon entry and lnum
// cannot wrap due to the loop termination condition.
PREFAST_ASSUME(lnum != 0U - 1);
BasicBlock* block;
BasicBlock* head;
BasicBlock* bottom;
GenTree* loop;
GenTree* test;
GenTree* incr;
GenTree* phdr;
GenTree* init;
bool dupCond;
int lval;
int lbeg; // initial value for iterator
int llim; // limit value for iterator
unsigned lvar; // iterator lclVar #
int iterInc; // value to increment the iterator
genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
var_types iterOperType; // type result of the oper (for overflow instrs)
genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
bool unsTest; // Is the comparison u/int
unsigned loopRetCount; // number of BBJ_RETURN blocks in loop
unsigned totalIter; // total number of iterations in the constant loop
unsigned loopFlags; // actual lpFlags
unsigned requiredFlags; // required lpFlags
static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
10, // BLENDED_CODE
0, // SMALL_CODE
20, // FAST_CODE
0 // COUNT_OPT_CODE
};
noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
#ifdef DEBUG
if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
{
iterLimit *= 10;
}
#endif
static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
300, // BLENDED_CODE
0, // SMALL_CODE
600, // FAST_CODE
0 // COUNT_OPT_CODE
};
noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
loopFlags = optLoopTable[lnum].lpFlags;
// Check for required flags:
// LPFLG_DO_WHILE - required because this transform only handles loops of this form
// LPFLG_CONST - required because this transform only handles full unrolls
// LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT;
#ifdef DEBUG
if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
{
// In stress mode, quadruple the size limit, and drop
// the restriction that loop limit must be Vector<T>.Count.
unrollLimitSz *= 4;
requiredFlags &= ~LPFLG_SIMD_LIMIT;
}
#endif
/* Ignore the loop if we don't have a do-while
that has a constant number of iterations */
if ((loopFlags & requiredFlags) != requiredFlags)
{
continue;
}
/* ignore if removed or marked as not unrollable */
if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
{
continue;
}
head = optLoopTable[lnum].lpHead;
noway_assert(head);
bottom = optLoopTable[lnum].lpBottom;
noway_assert(bottom);
/* Get the loop data:
- initial constant
- limit constant
- iterator
- iterator increment
- increment operation type (i.e. ADD, SUB, etc...)
- loop test type (i.e. GT_GE, GT_LT, etc...)
*/
lbeg = optLoopTable[lnum].lpConstInit;
llim = optLoopTable[lnum].lpConstLimit();
testOper = optLoopTable[lnum].lpTestOper();
lvar = optLoopTable[lnum].lpIterVar();
iterInc = optLoopTable[lnum].lpIterConst();
iterOper = optLoopTable[lnum].lpIterOper();
iterOperType = optLoopTable[lnum].lpIterOperType();
unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
if (lvaTable[lvar].lvAddrExposed)
{ // If the loop iteration variable is address-exposed then bail
continue;
}
if (lvaTable[lvar].lvIsStructField)
{ // If the loop iteration variable is a promoted field from a struct then
// bail
continue;
}
/* Locate the pre-header and initialization and increment/test statements */
phdr = head->bbTreeList;
noway_assert(phdr);
loop = bottom->bbTreeList;
noway_assert(loop);
init = head->lastStmt();
noway_assert(init && (init->gtNext == nullptr));
test = bottom->lastStmt();
noway_assert(test && (test->gtNext == nullptr));
incr = test->gtPrev;
noway_assert(incr);
if (init->gtFlags & GTF_STMT_CMPADD)
{
/* Must be a duplicated loop condition */
noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
dupCond = true;
init = init->gtPrev;
noway_assert(init);
}
else
{
dupCond = false;
}
/* Find the number of iterations - the function returns false if not a constant number */
if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
{
continue;
}
/* Forget it if there are too many repetitions or not a constant loop */
if (totalIter > iterLimit)
{
continue;
}
noway_assert(init->gtOper == GT_STMT);
init = init->gtStmt.gtStmtExpr;
noway_assert(test->gtOper == GT_STMT);
test = test->gtStmt.gtStmtExpr;
noway_assert(incr->gtOper == GT_STMT);
incr = incr->gtStmt.gtStmtExpr;
// Don't unroll loops we don't understand.
if (incr->gtOper != GT_ASG)
{
continue;
}
incr = incr->gtOp.gtOp2;
/* Make sure everything looks ok */
if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
(init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
(init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
!((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
(incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
(incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
(test->gtOper != GT_JTRUE))
{
noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
continue;
}
/* heuristic - Estimated cost in code size of the unrolled loop */
{
ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
block = head->bbNext;
auto tryIndex = block->bbTryIndex;
loopRetCount = 0;
for (;; block = block->bbNext)
{
if (block->bbTryIndex != tryIndex)
{
// Unrolling would require cloning EH regions
goto DONE_LOOP;
}
if (block->bbJumpKind == BBJ_RETURN)
{
++loopRetCount;
}
/* Visit all the statements in the block */
for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
/* Calculate gtCostSz */
gtSetStmtInfo(stmt);
/* Update loopCostSz */
loopCostSz += stmt->gtCostSz;
}
if (block == bottom)
{
break;
}
}
#ifdef JIT32_GCENCODER
if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX)
{
// Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
goto DONE_LOOP;
}
#endif // !JIT32_GCENCODER
/* Compute the estimated increase in code size for the unrolled loop */
ClrSafeInt<unsigned> fixedLoopCostSz(8);
ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
/* Don't unroll if too much code duplication would result. */
if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz))
{
goto DONE_LOOP;
}
/* Looks like a good idea to unroll this loop, let's do it! */
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
{
printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
if (head->bbNext->bbNum != bottom->bbNum)
{
printf("..BB%02u", bottom->bbNum);
}
printf(" over V%02u from %u to %u", lvar, lbeg, llim);
printf(" unrollCostSz = %d\n", unrollCostSz);
printf("\n");
}
#endif
}
/* Create the unrolled loop statement list */
{
BlockToBlockMap blockMap(getAllocator());
BasicBlock* insertAfter = bottom;
for (lval = lbeg; totalIter; totalIter--)
{
for (block = head->bbNext;; block = block->bbNext)
{
BasicBlock* newBlock = insertAfter =
fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true);
blockMap.Set(block, newBlock);
if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
{
// cloneExpr doesn't handle everything
BasicBlock* oldBottomNext = insertAfter->bbNext;
bottom->bbNext = oldBottomNext;
oldBottomNext->bbPrev = bottom;
optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
goto DONE_LOOP;
}
// Block weight should no longer have the loop multiplier
newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
// Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
assert(newBlock->bbJumpDest == nullptr);
if (block == bottom)
{
// Remove the test; we're doing a full unroll.
GenTreeStmt* testCopyStmt = newBlock->lastStmt();
GenTree* testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
assert(testCopyExpr->gtOper == GT_JTRUE);
GenTree* sideEffList = nullptr;
gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF);
if (sideEffList == nullptr)
{
fgRemoveStmt(newBlock, testCopyStmt);
}
else
{
testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
}
newBlock->bbJumpKind = BBJ_NONE;
// Exit this loop; we've walked all the blocks.
break;
}
}
// Now redirect any branches within the newly-cloned iteration
for (block = head->bbNext; block != bottom; block = block->bbNext)
{
BasicBlock* newBlock = blockMap[block];
optCopyBlkDest(block, newBlock);
optRedirectBlock(newBlock, &blockMap);
}
/* update the new value for the unrolled iterator */
switch (iterOper)
{
case GT_ADD:
lval += iterInc;
break;
case GT_SUB:
lval -= iterInc;
break;
case GT_RSH:
case GT_LSH:
noway_assert(!"Unrolling not implemented for this loop iterator");
goto DONE_LOOP;
default:
noway_assert(!"Unknown operator for constant loop iterator");
goto DONE_LOOP;
}
}
// Gut the old loop body
for (block = head->bbNext;; block = block->bbNext)
{
block->bbTreeList = nullptr;
block->bbJumpKind = BBJ_NONE;
block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD);
if (block->bbJumpDest != nullptr)
{
block->bbJumpDest = nullptr;
}
if (block == bottom)
{
break;
}
}
/* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
if (head->bbJumpKind == BBJ_COND)
{
phdr = head->bbTreeList;
noway_assert(phdr);
test = phdr->gtPrev;
noway_assert(test && (test->gtNext == nullptr));
noway_assert(test->gtOper == GT_STMT);
noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
init = test->gtPrev;
noway_assert(init && (init->gtNext == test));
noway_assert(init->gtOper == GT_STMT);
init->gtNext = nullptr;
phdr->gtPrev = init;
head->bbJumpKind = BBJ_NONE;
head->bbFlags &= ~BBF_NEEDS_GCPOLL;
}
else
{
/* the loop must execute */
noway_assert(head->bbJumpKind == BBJ_NONE);
}
#ifdef DEBUG
if (verbose)
{
printf("Whole unrolled loop:\n");