Permalink
Fetching contributors…
Cannot retrieve contributors at this time
11907 lines (10125 sloc) 418 KB
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX XX
XX Code Generator Common: XX
XX Methods common to all architectures and register allocation strategies XX
XX XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
// TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
// identical, and which should probably be moved here.
#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#endif
#include "codegen.h"
#include "gcinfo.h"
#include "emit.h"
#ifndef JIT32_GCENCODER
#include "gcinfoencoder.h"
#endif
/*****************************************************************************/
const BYTE genTypeSizes[] = {
#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
#include "typelist.h"
#undef DEF_TP
};
const BYTE genTypeAlignments[] = {
#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
#include "typelist.h"
#undef DEF_TP
};
const BYTE genTypeStSzs[] = {
#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
#include "typelist.h"
#undef DEF_TP
};
const BYTE genActualTypes[] = {
#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
#include "typelist.h"
#undef DEF_TP
};
void CodeGenInterface::setFramePointerRequiredEH(bool value)
{
m_cgFramePointerRequired = value;
#ifndef JIT32_GCENCODER
if (value)
{
// EnumGcRefs will only enumerate slots in aborted frames
// if they are fully-interruptible. So if we have a catch
// or finally that will keep frame-vars alive, we need to
// force fully-interruptible.
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
{
printf("Method has EH, marking method as fully interruptible\n");
}
#endif
m_cgInterruptible = true;
}
#endif // JIT32_GCENCODER
}
/*****************************************************************************/
CodeGenInterface* getCodeGenerator(Compiler* comp)
{
return new (comp, CMK_Codegen) CodeGen(comp);
}
// CodeGen constructor
CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
: gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
{
}
/*****************************************************************************/
CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
{
#if defined(_TARGET_XARCH_)
negBitmaskFlt = nullptr;
negBitmaskDbl = nullptr;
absBitmaskFlt = nullptr;
absBitmaskDbl = nullptr;
u8ToDblBitmask = nullptr;
#endif // defined(_TARGET_XARCH_)
#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
m_stkArgVarNum = BAD_VAR_NUM;
#endif
#if defined(UNIX_X86_ABI)
curNestedAlignment = 0;
maxNestedAlignment = 0;
#endif
gcInfo.regSet = &regSet;
m_cgEmitter = new (compiler->getAllocator()) emitter();
m_cgEmitter->codeGen = this;
m_cgEmitter->gcInfo = &gcInfo;
#ifdef DEBUG
setVerbose(compiler->verbose);
#endif // DEBUG
regSet.tmpInit();
instInit();
#ifdef LATE_DISASM
getDisAssembler().disInit(compiler);
#endif
#ifdef DEBUG
genTempLiveChg = true;
genTrnslLocalVarCount = 0;
// Shouldn't be used before it is set in genFnProlog()
compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
#if defined(_TARGET_XARCH_)
// Shouldn't be used before it is set in genFnProlog()
compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
#endif // defined(_TARGET_XARCH_)
#endif // DEBUG
#ifdef _TARGET_AMD64_
// This will be set before final frame layout.
compiler->compVSQuirkStackPaddingNeeded = 0;
// Set to true if we perform the Quirk that fixes the PPP issue
compiler->compQuirkForPPPflag = false;
#endif // _TARGET_AMD64_
// Initialize the IP-mapping logic.
compiler->genIPmappingList = nullptr;
compiler->genIPmappingLast = nullptr;
compiler->genCallSite2ILOffsetMap = nullptr;
/* Assume that we not fully interruptible */
genInterruptible = false;
#ifdef _TARGET_ARMARCH_
hasTailCalls = false;
#endif // _TARGET_ARMARCH_
#ifdef DEBUG
genInterruptibleUsed = false;
genCurDispOffset = (unsigned)-1;
#endif
}
void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
{
tree->gtRegNum = reg;
}
#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
//---------------------------------------------------------------------
// genTotalFrameSize - return the "total" size of the stack frame, including local size
// and callee-saved register size. There are a few things "missing" depending on the
// platform. The function genCallerSPtoInitialSPdelta() includes those things.
//
// For ARM, this doesn't include the prespilled registers.
//
// For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
// It also doesn't include the pushed return address.
//
// Return value:
// Frame size
int CodeGenInterface::genTotalFrameSize()
{
assert(!IsUninitialized(compiler->compCalleeRegsPushed));
int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
assert(totalFrameSize >= 0);
return totalFrameSize;
}
//---------------------------------------------------------------------
// genSPtoFPdelta - return the offset from SP to the frame pointer.
// This number is going to be positive, since SP must be at the lowest
// address.
//
// There must be a frame pointer to call this function!
int CodeGenInterface::genSPtoFPdelta()
{
assert(isFramePointerUsed());
int delta;
delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
assert(delta >= 0);
return delta;
}
//---------------------------------------------------------------------
// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
// This number is going to be negative, since the Caller-SP is at a higher
// address than the frame pointer.
//
// There must be a frame pointer to call this function!
int CodeGenInterface::genCallerSPtoFPdelta()
{
assert(isFramePointerUsed());
int callerSPtoFPdelta = 0;
#if defined(_TARGET_ARM_)
// On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
#elif defined(_TARGET_X86_)
// Thanks to ebp chaining, the difference between ebp-based addresses
// and caller-SP-relative addresses is just the 2 pointers:
// return address
// pushed ebp
callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
#else
#error "Unknown _TARGET_"
#endif // _TARGET_*
assert(callerSPtoFPdelta <= 0);
return callerSPtoFPdelta;
}
//---------------------------------------------------------------------
// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
//
// This number will be negative.
int CodeGenInterface::genCallerSPtoInitialSPdelta()
{
int callerSPtoSPdelta = 0;
#if defined(_TARGET_ARM_)
callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
callerSPtoSPdelta -= genTotalFrameSize();
#elif defined(_TARGET_X86_)
callerSPtoSPdelta -= genTotalFrameSize();
callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
// compCalleeRegsPushed does not account for the frame pointer
// TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
if (isFramePointerUsed())
{
callerSPtoSPdelta -= REGSIZE_BYTES;
}
#else
#error "Unknown _TARGET_"
#endif // _TARGET_*
assert(callerSPtoSPdelta <= 0);
return callerSPtoSPdelta;
}
#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
/*****************************************************************************
* Should we round simple operations (assignments, arithmetic operations, etc.)
*/
// inline
// static
bool CodeGen::genShouldRoundFP()
{
RoundLevel roundLevel = getRoundFloatLevel();
switch (roundLevel)
{
case ROUND_NEVER:
case ROUND_CMP_CONST:
case ROUND_CMP:
return false;
default:
assert(roundLevel == ROUND_ALWAYS);
return true;
}
}
/*****************************************************************************
*
* Initialize some global variables.
*/
void CodeGen::genPrepForCompiler()
{
treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
/* Figure out which non-register variables hold pointers */
VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
// Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
// in a register (i.e. they live on the stack for all or part of their lifetime).
// Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
unsigned varNum;
LclVarDsc* varDsc;
for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
{
if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
{
if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
{
VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
}
}
}
VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
genLastLiveMask = RBM_NONE;
#ifdef DEBUG
compiler->fgBBcountAtCodegen = compiler->fgBBcount;
#endif
}
/*****************************************************************************
* To report exception handling information to the VM, we need the size of the exception
* handling regions. To compute that, we need to emit labels for the beginning block of
* an EH region, and the block that immediately follows a region. Go through the EH
* table and mark all these blocks with BBF_HAS_LABEL to make this happen.
*
* The beginning blocks of the EH regions already should have this flag set.
*
* No blocks should be added or removed after this.
*
* This code is closely couple with genReportEH() in the sense that any block
* that this procedure has determined it needs to have a label has to be selected
* using the same logic both here and in genReportEH(), so basically any time there is
* a change in the way we handle EH reporting, we have to keep the logic of these two
* methods 'in sync'.
*/
void CodeGen::genPrepForEHCodegen()
{
assert(!compiler->fgSafeBasicBlockCreation);
EHblkDsc* HBtab;
EHblkDsc* HBtabEnd;
bool anyFinallys = false;
for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
HBtab < HBtabEnd; HBtab++)
{
assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
if (HBtab->ebdTryLast->bbNext != nullptr)
{
HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
}
if (HBtab->ebdHndLast->bbNext != nullptr)
{
HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
}
if (HBtab->HasFilter())
{
assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
// The block after the last block of the filter is
// the handler begin block, which we already asserted
// has BBF_HAS_LABEL set.
}
#if FEATURE_EH_CALLFINALLY_THUNKS
if (HBtab->HasFinallyHandler())
{
anyFinallys = true;
}
#endif // FEATURE_EH_CALLFINALLY_THUNKS
}
#if FEATURE_EH_CALLFINALLY_THUNKS
if (anyFinallys)
{
for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
if (block->bbJumpKind == BBJ_CALLFINALLY)
{
BasicBlock* bbToLabel = block->bbNext;
if (block->isBBCallAlwaysPair())
{
bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
}
if (bbToLabel != nullptr)
{
bbToLabel->bbFlags |= BBF_HAS_LABEL;
}
} // block is BBJ_CALLFINALLY
} // for each block
} // if (anyFinallys)
#endif // FEATURE_EH_CALLFINALLY_THUNKS
}
void CodeGenInterface::genUpdateLife(GenTree* tree)
{
treeLifeUpdater->UpdateLife(tree);
}
void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
{
compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
}
// Return the register mask for the given register variable
// inline
regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
{
regMaskTP regMask = RBM_NONE;
assert(varDsc->lvIsInReg());
if (varTypeIsFloating(varDsc->TypeGet()))
{
regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
}
else
{
regMask = genRegMask(varDsc->lvRegNum);
}
return regMask;
}
// Return the register mask for the given lclVar or regVar tree node
// inline
regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
{
assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_REG_VAR);
regMaskTP regMask = RBM_NONE;
const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
if (varDsc->lvPromoted)
{
for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
{
noway_assert(compiler->lvaTable[i].lvIsStructField);
if (compiler->lvaTable[i].lvIsInReg())
{
regMask |= genGetRegMask(&compiler->lvaTable[i]);
}
}
}
else if (varDsc->lvIsInReg())
{
regMask = genGetRegMask(varDsc);
}
return regMask;
}
// The given lclVar is either going live (being born) or dying.
// It might be both going live and dying (that is, it is a dead store) under MinOpts.
// Update regSet.rsMaskVars accordingly.
// inline
void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
{
regMaskTP regMask = genGetRegMask(varDsc);
#ifdef DEBUG
if (compiler->verbose)
{
printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
varDsc->PrintVarReg();
printf(" is becoming %s ", (isDying) ? "dead" : "live");
Compiler::printTreeID(tree);
printf("\n");
}
#endif // DEBUG
if (isDying)
{
// We'd like to be able to assert the following, however if we are walking
// through a qmark/colon tree, we may encounter multiple last-use nodes.
// assert((regSet.rsMaskVars & regMask) == regMask);
regSet.RemoveMaskVars(regMask);
}
else
{
assert((regSet.rsMaskVars & regMask) == 0);
regSet.AddMaskVars(regMask);
}
}
//----------------------------------------------------------------------
// compNoGCHelperCallKillSet:
//
// Gets a register mask that represents the kill set for a helper call.
// Not all JIT Helper calls follow the standard ABI on the target architecture.
//
// TODO-CQ: Currently this list is incomplete (not all helpers calls are
// enumerated) and not 100% accurate (some killsets are bigger than
// what they really are).
// There's some work to be done in several places in the JIT to
// accurately track the registers that are getting killed by
// helper calls:
// a) LSRA needs several changes to accomodate more precise killsets
// for every helper call it sees (both explicitly [easy] and
// implicitly [hard])
// b) Currently for AMD64, when we generate code for a helper call
// we're independently over-pessimizing the killsets of the call
// (independently from LSRA) and this needs changes
// both in CodeGenAmd64.cpp and emitx86.cpp.
//
// The best solution for this problem would be to try to centralize
// the killset information in a single place but then make the
// corresponding changes so every code generation phase is in sync
// about this.
//
// The interim solution is to only add known helper calls that don't
// follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
//
// Arguments:
// helper - The helper being inquired about
//
// Return Value:
// Mask of register kills -- registers whose values are no longer guaranteed to be the same.
//
regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
{
switch (helper)
{
case CORINFO_HELP_ASSIGN_BYREF:
#if defined(_TARGET_AMD64_)
return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
#elif defined(_TARGET_ARMARCH_)
return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
#elif defined(_TARGET_X86_)
return RBM_ESI | RBM_EDI | RBM_ECX;
#else
NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
return RBM_CALLEE_TRASH;
#endif
#if defined(_TARGET_ARMARCH_)
case CORINFO_HELP_ASSIGN_REF:
case CORINFO_HELP_CHECKED_ASSIGN_REF:
return RBM_CALLEE_TRASH_WRITEBARRIER;
#endif
case CORINFO_HELP_PROF_FCN_ENTER:
#ifdef RBM_PROFILER_ENTER_TRASH
return RBM_PROFILER_ENTER_TRASH;
#else
NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
#endif
case CORINFO_HELP_PROF_FCN_LEAVE:
#ifdef RBM_PROFILER_LEAVE_TRASH
return RBM_PROFILER_LEAVE_TRASH;
#else
NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
#endif
case CORINFO_HELP_PROF_FCN_TAILCALL:
#ifdef RBM_PROFILER_TAILCALL_TRASH
return RBM_PROFILER_TAILCALL_TRASH;
#else
NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
#endif
#ifdef _TARGET_X86_
case CORINFO_HELP_ASSIGN_REF_EAX:
case CORINFO_HELP_ASSIGN_REF_ECX:
case CORINFO_HELP_ASSIGN_REF_EBX:
case CORINFO_HELP_ASSIGN_REF_EBP:
case CORINFO_HELP_ASSIGN_REF_ESI:
case CORINFO_HELP_ASSIGN_REF_EDI:
case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
return RBM_EDX;
#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
case CORINFO_HELP_ASSIGN_REF:
case CORINFO_HELP_CHECKED_ASSIGN_REF:
return RBM_EAX | RBM_EDX;
#endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
#endif
case CORINFO_HELP_STOP_FOR_GC:
return RBM_STOP_FOR_GC_TRASH;
case CORINFO_HELP_INIT_PINVOKE_FRAME:
return RBM_INIT_PINVOKE_FRAME_TRASH;
default:
return RBM_CALLEE_TRASH;
}
}
//----------------------------------------------------------------------
// compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer
// contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining
// what registers to remove from the current live GC/byref sets (and thus what to report as dead in the
// GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by
// compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src
// address registers as killed for liveness purposes, since their values change. However, they still are
// valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here.
//
// Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers.
//
// Arguments:
// helper - The helper being inquired about
//
// Return Value:
// Mask of GC register kills
//
regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
{
assert(emitter::emitNoGChelper(helper));
switch (helper)
{
#if defined(_TARGET_XARCH_)
case CORINFO_HELP_PROF_FCN_ENTER:
return RBM_PROFILER_ENTER_TRASH;
case CORINFO_HELP_PROF_FCN_LEAVE:
return RBM_PROFILER_LEAVE_TRASH;
case CORINFO_HELP_PROF_FCN_TAILCALL:
return RBM_PROFILER_TAILCALL_TRASH;
#endif // defined(_TARGET_XARCH_)
#if defined(_TARGET_X86_)
case CORINFO_HELP_ASSIGN_BYREF:
// This helper only trashes ECX.
return RBM_ECX;
#endif // defined(_TARGET_X86_)
#if defined(_TARGET_ARMARCH_)
case CORINFO_HELP_ASSIGN_BYREF:
return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
case CORINFO_HELP_ASSIGN_REF:
case CORINFO_HELP_CHECKED_ASSIGN_REF:
return RBM_CALLEE_GCTRASH_WRITEBARRIER;
#endif
#if defined(_TARGET_X86_)
case CORINFO_HELP_INIT_PINVOKE_FRAME:
return RBM_INIT_PINVOKE_FRAME_TRASH;
#endif // defined(_TARGET_X86_)
default:
return RBM_CALLEE_TRASH_NOGC;
}
}
template <bool ForCodeGen>
void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
{
LclVarDsc* varDsc;
#ifdef DEBUG
if (verbose)
{
printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
dumpConvertedVarSet(this, compCurLife);
printf(" -> %s ", VarSetOps::ToString(this, newLife));
dumpConvertedVarSet(this, newLife);
printf("\n");
}
#endif // DEBUG
/* We should only be called when the live set has actually changed */
noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
if (!ForCodeGen)
{
VarSetOps::Assign(this, compCurLife, newLife);
return;
}
/* Figure out which variables are becoming live/dead at this point */
// deadSet = compCurLife - newLife
VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
// bornSet = newLife - compCurLife
VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
/* Can't simultaneously become live and dead at the same time */
// (deadSet UNION bornSet) != EMPTY
noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
// (deadSet INTERSECTION bornSet) == EMPTY
noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
VarSetOps::Assign(this, compCurLife, newLife);
// Handle the dying vars first, then the newly live vars.
// This is because, in the RyuJIT backend case, they may occupy registers that
// will be occupied by another var that is newly live.
VarSetOps::Iter deadIter(this, deadSet);
unsigned deadVarIndex = 0;
while (deadIter.NextElem(&deadVarIndex))
{
unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
varDsc = lvaTable + varNum;
bool isGCRef = (varDsc->TypeGet() == TYP_REF);
bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
if (varDsc->lvIsInReg())
{
// TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
// gc sets
regMaskTP regMask = varDsc->lvRegMask();
if (isGCRef)
{
codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
}
else if (isByRef)
{
codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
}
codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
}
// This isn't in a register, so update the gcVarPtrSetCur.
else if (isGCRef || isByRef)
{
VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
}
}
VarSetOps::Iter bornIter(this, bornSet);
unsigned bornVarIndex = 0;
while (bornIter.NextElem(&bornVarIndex))
{
unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
varDsc = lvaTable + varNum;
bool isGCRef = (varDsc->TypeGet() == TYP_REF);
bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
if (varDsc->lvIsInReg())
{
#ifdef DEBUG
if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
{
JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
}
#endif // DEBUG
VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
regMaskTP regMask = varDsc->lvRegMask();
if (isGCRef)
{
codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
}
else if (isByRef)
{
codeGen->gcInfo.gcRegByrefSetCur |= regMask;
}
}
// This isn't in a register, so update the gcVarPtrSetCur
else if (lvaIsGCTracked(varDsc))
{
VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
}
}
codeGen->siUpdate();
}
// Need an explicit instantiation.
template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
/*****************************************************************************
*
* Generate a spill.
*/
void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
{
getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
}
/*****************************************************************************
*
* Generate a reload.
*/
void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
{
getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
}
// inline
regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
{
return REG_ARG_0;
}
//----------------------------------------------------------------------
// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
//
// Arguments:
// tree - spilled GenTree node
//
// Return Value:
// TempDsc corresponding to tree
TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
{
// tree must be in spilled state.
assert((tree->gtFlags & GTF_SPILLED) != 0);
// Get the tree's SpillDsc.
RegSet::SpillDsc* prevDsc;
RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
assert(spillDsc != nullptr);
// Get the temp desc.
TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
return temp;
}
#ifdef _TARGET_XARCH_
#ifdef _TARGET_AMD64_
// Returns relocation type hint for an addr.
// Note that there are no reloc hints on x86.
//
// Arguments
// addr - data address
//
// Returns
// relocation type hint
//
unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
{
return compiler->eeGetRelocTypeHint((void*)addr);
}
#endif //_TARGET_AMD64_
// Return true if an absolute indirect data address can be encoded as IP-relative.
// offset. Note that this method should be used only when the caller knows that
// the address is an icon value that VM has given and there is no GenTree node
// representing it. Otherwise, one should always use FitsInAddrBase().
//
// Arguments
// addr - an absolute indirect data address
//
// Returns
// true if indir data addr could be encoded as IP-relative offset.
//
bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
{
#ifdef _TARGET_AMD64_
return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
#else
// x86: PC-relative addressing is available only for control flow instructions (jmp and call)
return false;
#endif
}
// Return true if an indirect code address can be encoded as IP-relative offset.
// Note that this method should be used only when the caller knows that the
// address is an icon value that VM has given and there is no GenTree node
// representing it. Otherwise, one should always use FitsInAddrBase().
//
// Arguments
// addr - an absolute indirect code address
//
// Returns
// true if indir code addr could be encoded as IP-relative offset.
//
bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
{
#ifdef _TARGET_AMD64_
return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
#else
// x86: PC-relative addressing is available only for control flow instructions (jmp and call)
return true;
#endif
}
// Return true if an indirect code address can be encoded as 32-bit displacement
// relative to zero. Note that this method should be used only when the caller
// knows that the address is an icon value that VM has given and there is no
// GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
//
// Arguments
// addr - absolute indirect code address
//
// Returns
// true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
//
bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
{
return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
}
// Return true if an absolute indirect code address needs a relocation recorded with VM.
//
// Arguments
// addr - an absolute indirect code address
//
// Returns
// true if indir code addr needs a relocation recorded with VM
//
bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
{
// If generating relocatable ngen code, then all code addr should go through relocation
if (compiler->opts.compReloc)
{
return true;
}
#ifdef _TARGET_AMD64_
// If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
{
return true;
}
// It could be possible that the code indir addr could be encoded as 32-bit displacement relative
// to zero. But we don't need to emit a relocation in that case.
return false;
#else //_TARGET_X86_
// On x86 there is need for recording relocations during jitting,
// because all addrs fit within 32-bits.
return false;
#endif //_TARGET_X86_
}
// Return true if a direct code address needs to be marked as relocatable.
//
// Arguments
// addr - absolute direct code address
//
// Returns
// true if direct code addr needs a relocation recorded with VM
//
bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
{
// If generating relocatable ngen code, then all code addr should go through relocation
if (compiler->opts.compReloc)
{
return true;
}
#ifdef _TARGET_AMD64_
// By default all direct code addresses go through relocation so that VM will setup
// a jump stub if addr cannot be encoded as pc-relative offset.
return true;
#else //_TARGET_X86_
// On x86 there is no need for recording relocations during jitting,
// because all addrs fit within 32-bits.
return false;
#endif //_TARGET_X86_
}
#endif //_TARGET_XARCH_
/*****************************************************************************
*
* The following can be used to create basic blocks that serve as labels for
* the emitter. Use with caution - these are not real basic blocks!
*
*/
// inline
BasicBlock* CodeGen::genCreateTempLabel()
{
#ifdef DEBUG
// These blocks don't affect FP
compiler->fgSafeBasicBlockCreation = true;
#endif
BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
#ifdef DEBUG
compiler->fgSafeBasicBlockCreation = false;
#endif
block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
// Use coldness of current block, as this label will
// be contained in it.
block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
#ifdef DEBUG
#ifdef UNIX_X86_ABI
block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
#else
block->bbTgtStkDepth = genStackLevel / sizeof(int);
#endif
#endif
return block;
}
// inline
void CodeGen::genDefineTempLabel(BasicBlock* label)
{
#ifdef DEBUG
if (compiler->opts.dspCode)
{
printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, label->bbNum);
}
#endif
label->bbEmitCookie =
getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
}
/*****************************************************************************
*
* Adjust the stack pointer by the given value; assumes that this follows
* a call so only callee-saved registers (and registers that may hold a
* return value) are used at this point.
*/
void CodeGen::genAdjustSP(ssize_t delta)
{
#if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
if (delta == sizeof(int))
inst_RV(INS_pop, REG_ECX, TYP_INT);
else
#endif
inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
}
//------------------------------------------------------------------------
// genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
//
// Arguments:
// block - The BasicBlock for which we are about to generate code.
//
// Assumptions:
// Must be called just prior to generating code for 'block'.
//
// Notes:
// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
// and if 'block' is a throw helper block with a non-zero stack level.
void CodeGen::genAdjustStackLevel(BasicBlock* block)
{
#if !FEATURE_FIXED_OUT_ARGS
// Check for inserted throw blocks and adjust genStackLevel.
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(UNIX_X86_ABI)
if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
{
// x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
// at this point if a jump to this block is made in the middle of pushing arugments.
//
// Here we restore SP to prevent potential stack alignment issues.
getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
}
#endif
if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
{
noway_assert(block->bbFlags & BBF_JMP_TARGET);
SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
if (genStackLevel != 0)
{
#ifdef _TARGET_X86_
getEmitter()->emitMarkStackLvl(genStackLevel);
inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
SetStackLevel(0);
#else // _TARGET_X86_
NYI("Need emitMarkStackLvl()");
#endif // _TARGET_X86_
}
}
#endif // !FEATURE_FIXED_OUT_ARGS
}
#ifdef _TARGET_ARMARCH_
// return size
// alignmentWB is out param
unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
{
unsigned alignment = 0;
unsigned opSize = 0;
if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
{
opSize = InferStructOpSizeAlign(op, &alignment);
}
else
{
alignment = genTypeAlignments[op->TypeGet()];
opSize = genTypeSizes[op->TypeGet()];
}
assert(opSize != 0);
assert(alignment != 0);
(*alignmentWB) = alignment;
return opSize;
}
// return size
// alignmentWB is out param
unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
{
unsigned alignment = 0;
unsigned opSize = 0;
while (op->gtOper == GT_COMMA)
{
op = op->gtOp.gtOp2;
}
if (op->gtOper == GT_OBJ)
{
CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
alignment =
(unsigned)roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
}
else if (op->gtOper == GT_LCL_VAR)
{
unsigned varNum = op->gtLclVarCommon.gtLclNum;
LclVarDsc* varDsc = compiler->lvaTable + varNum;
assert(varDsc->lvType == TYP_STRUCT);
opSize = varDsc->lvSize();
#ifndef _TARGET_64BIT_
if (varDsc->lvStructDoubleAlign)
{
alignment = TARGET_POINTER_SIZE * 2;
}
else
#endif // !_TARGET_64BIT_
{
alignment = TARGET_POINTER_SIZE;
}
}
else if (op->OperIsCopyBlkOp())
{
GenTree* op2 = op->gtOp.gtOp2;
if (op2->OperGet() == GT_CNS_INT)
{
if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
{
CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
opSize = (unsigned)roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
alignment = (unsigned)roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd),
TARGET_POINTER_SIZE);
}
else
{
opSize = (unsigned)op2->gtIntCon.gtIconVal;
GenTree* op1 = op->gtOp.gtOp1;
assert(op1->OperGet() == GT_LIST);
GenTree* dstAddr = op1->gtOp.gtOp1;
if (dstAddr->OperGet() == GT_ADDR)
{
InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
}
else
{
assert(!"Unhandle dstAddr node");
alignment = TARGET_POINTER_SIZE;
}
}
}
else
{
noway_assert(!"Variable sized COPYBLK register arg!");
opSize = 0;
alignment = TARGET_POINTER_SIZE;
}
}
else if (op->gtOper == GT_MKREFANY)
{
opSize = TARGET_POINTER_SIZE * 2;
alignment = TARGET_POINTER_SIZE;
}
else if (op->IsArgPlaceHolderNode())
{
CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
assert(clsHnd != 0);
opSize = (unsigned)roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
alignment =
(unsigned)roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
}
else
{
assert(!"Unhandled gtOper");
opSize = TARGET_POINTER_SIZE;
alignment = TARGET_POINTER_SIZE;
}
assert(opSize != 0);
assert(alignment != 0);
(*alignmentWB) = alignment;
return opSize;
}
#endif // _TARGET_ARMARCH_
/*****************************************************************************
*
* Take an address expression and try to find the best set of components to
* form an address mode; returns non-zero if this is successful.
*
* TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
* Refactor this code so that the underlying analysis can be used in
* the RyuJIT Backend to do lowering, instead of having to call this method with the
* option to not generate the code.
*
* 'fold' specifies if it is OK to fold the array index which hangs off
* a GT_NOP node.
*
* If successful, the parameters will be set to the following values:
*
* *rv1Ptr ... base operand
* *rv2Ptr ... optional operand
* *revPtr ... true if rv2 is before rv1 in the evaluation order
* #if SCALED_ADDR_MODES
* *mulPtr ... optional multiplier (2/4/8) for rv2
* Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
* #endif
* *cnsPtr ... integer constant [optional]
*
* IMPORTANT NOTE: This routine doesn't generate any code, it merely
* identifies the components that might be used to
* form an address mode later on.
*/
bool CodeGen::genCreateAddrMode(GenTree* addr,
bool fold,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
#if SCALED_ADDR_MODES
unsigned* mulPtr,
#endif // SCALED_ADDR_MODES
ssize_t* cnsPtr)
{
/*
The following indirections are valid address modes on x86/x64:
[ icon] * not handled here
[reg ]
[reg + icon]
[reg1 + reg2 ]
[reg1 + reg2 + icon]
[reg1 + 2 * reg2 ]
[reg1 + 4 * reg2 ]
[reg1 + 8 * reg2 ]
[ 2 * reg2 + icon]
[ 4 * reg2 + icon]
[ 8 * reg2 + icon]
[reg1 + 2 * reg2 + icon]
[reg1 + 4 * reg2 + icon]
[reg1 + 8 * reg2 + icon]
The following indirections are valid address modes on arm64:
[reg]
[reg + icon]
[reg1 + reg2]
[reg1 + reg2 * natural-scale]
*/
/* All indirect address modes require the address to be an addition */
if (addr->gtOper != GT_ADD)
{
return false;
}
// Can't use indirect addressing mode as we need to check for overflow.
// Also, can't use 'lea' as it doesn't set the flags.
if (addr->gtOverflow())
{
return false;
}
GenTree* rv1 = nullptr;
GenTree* rv2 = nullptr;
GenTree* op1;
GenTree* op2;
ssize_t cns;
#if SCALED_ADDR_MODES
unsigned mul;
#endif // SCALED_ADDR_MODES
GenTree* tmp;
/* What order are the sub-operands to be evaluated */
if (addr->gtFlags & GTF_REVERSE_OPS)
{
op1 = addr->gtOp.gtOp2;
op2 = addr->gtOp.gtOp1;
}
else
{
op1 = addr->gtOp.gtOp1;
op2 = addr->gtOp.gtOp2;
}
bool rev = false; // Is op2 first in the evaluation order?
/*
A complex address mode can combine the following operands:
op1 ... base address
op2 ... optional scaled index
#if SCALED_ADDR_MODES
mul ... optional multiplier (2/4/8) for op2
#endif
cns ... optional displacement
Here we try to find such a set of operands and arrange for these
to sit in registers.
*/
cns = 0;
#if SCALED_ADDR_MODES
mul = 0;
#endif // SCALED_ADDR_MODES
AGAIN:
/* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
here if we find a scaled index.
*/
CLANG_FORMAT_COMMENT_ANCHOR;
#if SCALED_ADDR_MODES
assert(mul == 0);
#endif // SCALED_ADDR_MODES
/* Special case: keep constants as 'op2' */
if (op1->IsCnsIntOrI())
{
// Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
tmp = op1;
op1 = op2;
op2 = tmp;
}
/* Check for an addition of a constant */
if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
{
/* We're adding a constant */
cns += op2->gtIntConCommon.IconValue();
#if defined(_TARGET_ARMARCH_)
if (cns == 0)
#endif
{
/* Inspect the operand the constant is being added to */
switch (op1->gtOper)
{
case GT_ADD:
if (op1->gtOverflow())
{
break;
}
op2 = op1->gtOp.gtOp2;
op1 = op1->gtOp.gtOp1;
goto AGAIN;
#if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_MUL:
if (op1->gtOverflow())
{
return false; // Need overflow check
}
__fallthrough;
case GT_LSH:
mul = op1->GetScaledIndex();
if (mul)
{
/* We can use "[mul*rv2 + icon]" */
rv1 = nullptr;
rv2 = op1->gtOp.gtOp1;
goto FOUND_AM;
}
break;
#endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
default:
break;
}
}
/* The best we can do is "[rv1 + icon]" */
rv1 = op1;
rv2 = nullptr;
goto FOUND_AM;
}
// op2 is not a constant. So keep on trying.
/* Neither op1 nor op2 are sitting in a register right now */
switch (op1->gtOper)
{
#if !defined(_TARGET_ARMARCH_)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_ADD:
if (op1->gtOverflow())
{
break;
}
if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
{
cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
op1 = op1->gtOp.gtOp1;
goto AGAIN;
}
break;
#if SCALED_ADDR_MODES
case GT_MUL:
if (op1->gtOverflow())
{
break;
}
__fallthrough;
case GT_LSH:
mul = op1->GetScaledIndex();
if (mul)
{
/* 'op1' is a scaled value */
rv1 = op2;
rv2 = op1->gtOp.gtOp1;
int argScale;
while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
{
if (jitIsScaleIndexMul(argScale * mul))
{
mul = mul * argScale;
rv2 = rv2->gtOp.gtOp1;
}
else
{
break;
}
}
noway_assert(rev == false);
rev = true;
goto FOUND_AM;
}
break;
#endif // SCALED_ADDR_MODES
#endif // !_TARGET_ARMARCH
case GT_NOP:
op1 = op1->gtOp.gtOp1;
goto AGAIN;
case GT_COMMA:
op1 = op1->gtOp.gtOp2;
goto AGAIN;
default:
break;
}
noway_assert(op2);
switch (op2->gtOper)
{
#if !defined(_TARGET_ARMARCH_)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_ADD:
if (op2->gtOverflow())
{
break;
}
if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
{
cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
op2 = op2->gtOp.gtOp1;
goto AGAIN;
}
break;
#if SCALED_ADDR_MODES
case GT_MUL:
if (op2->gtOverflow())
{
break;
}
__fallthrough;
case GT_LSH:
mul = op2->GetScaledIndex();
if (mul)
{
// 'op2' is a scaled value...is it's argument also scaled?
int argScale;
rv2 = op2->gtOp.gtOp1;
while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
{
if (jitIsScaleIndexMul(argScale * mul))
{
mul = mul * argScale;
rv2 = rv2->gtOp.gtOp1;
}
else
{
break;
}
}
rv1 = op1;
goto FOUND_AM;
}
break;
#endif // SCALED_ADDR_MODES
#endif // !_TARGET_ARMARCH
case GT_NOP:
op2 = op2->gtOp.gtOp1;
goto AGAIN;
case GT_COMMA:
op2 = op2->gtOp.gtOp2;
goto AGAIN;
default:
break;
}
/* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
rv1 = op1;
rv2 = op2;
#ifdef _TARGET_ARM64_
assert(cns == 0);
#endif
FOUND_AM:
if (rv2)
{
/* Make sure a GC address doesn't end up in 'rv2' */
if (varTypeIsGC(rv2->TypeGet()))
{
noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
tmp = rv1;
rv1 = rv2;
rv2 = tmp;
rev = !rev;
}
/* Special case: constant array index (that is range-checked) */
if (fold)
{
ssize_t tmpMul;
GenTree* index;
if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
{
/* For valuetype arrays where we can't use the scaled address
mode, rv2 will point to the scaled index. So we have to do
more work */
tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
if (mul)
{
tmpMul *= mul;
}
}
else
{
/* May be a simple array. rv2 will points to the actual index */
index = rv2;
tmpMul = mul;
}
/* Get hold of the array index and see if it's a constant */
if (index->IsIntCnsFitsInI32())
{
/* Get hold of the index value */
ssize_t ixv = index->AsIntConCommon()->IconValue();
#if SCALED_ADDR_MODES
/* Scale the index if necessary */
if (tmpMul)
{
ixv *= tmpMul;
}
#endif
if (FitsIn<INT32>(cns + ixv))
{
/* Add the scaled index to the offset value */
cns += ixv;
#if SCALED_ADDR_MODES
/* There is no scaled operand any more */
mul = 0;
#endif
rv2 = nullptr;
}
}
}
}
// We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
noway_assert(rv1 || mul != 1);
noway_assert(FitsIn<INT32>(cns));
if (rv1 == nullptr && rv2 == nullptr)
{
return false;
}
/* Success - return the various components to the caller */
*revPtr = rev;
*rv1Ptr = rv1;
*rv2Ptr = rv2;
#if SCALED_ADDR_MODES
*mulPtr = mul;
#endif
*cnsPtr = cns;
return true;
}
/*****************************************************************************
* The condition to use for (the jmp/set for) the given type of operation
*
* In case of amd64, this routine should be used when there is no gentree available
* and one needs to generate jumps based on integer comparisons. When gentree is
* available always use its overloaded version.
*
*/
// static
emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
{
const static BYTE genJCCinsSigned[] = {
#if defined(_TARGET_XARCH_)
EJ_je, // GT_EQ
EJ_jne, // GT_NE
EJ_jl, // GT_LT
EJ_jle, // GT_LE
EJ_jge, // GT_GE
EJ_jg, // GT_GT
EJ_je, // GT_TEST_EQ
EJ_jne, // GT_TEST_NE
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ
EJ_ne, // GT_NE
EJ_lt, // GT_LT
EJ_le, // GT_LE
EJ_ge, // GT_GE
EJ_gt, // GT_GT
#if defined(_TARGET_ARM64_)
EJ_eq, // GT_TEST_EQ
EJ_ne, // GT_TEST_NE
#endif
#endif
};
const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
{
#if defined(_TARGET_XARCH_)
EJ_je, // GT_EQ
EJ_jne, // GT_NE
EJ_jb, // GT_LT
EJ_jbe, // GT_LE
EJ_jae, // GT_GE
EJ_ja, // GT_GT
EJ_je, // GT_TEST_EQ
EJ_jne, // GT_TEST_NE
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ
EJ_ne, // GT_NE
EJ_lo, // GT_LT
EJ_ls, // GT_LE
EJ_hs, // GT_GE
EJ_hi, // GT_GT
#if defined(_TARGET_ARM64_)
EJ_eq, // GT_TEST_EQ
EJ_ne, // GT_TEST_NE
#endif
#endif
};
const static BYTE genJCCinsLogical[] = /* logical operation */
{
#if defined(_TARGET_XARCH_)
EJ_je, // GT_EQ (Z == 1)
EJ_jne, // GT_NE (Z == 0)
EJ_js, // GT_LT (S == 1)
EJ_NONE, // GT_LE
EJ_jns, // GT_GE (S == 0)
EJ_NONE, // GT_GT
EJ_NONE, // GT_TEST_EQ
EJ_NONE, // GT_TEST_NE
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ (Z == 1)
EJ_ne, // GT_NE (Z == 0)
EJ_mi, // GT_LT (N == 1)
EJ_NONE, // GT_LE
EJ_pl, // GT_GE (N == 0)
EJ_NONE, // GT_GT
#if defined(_TARGET_ARM64_)
EJ_eq, // GT_TEST_EQ
EJ_ne, // GT_TEST_NE
#endif
#endif
};
#if defined(_TARGET_XARCH_)
assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
#elif defined(_TARGET_ARMARCH_)
assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
#else
assert(!"unknown arch");
#endif
assert(GenTree::OperIsCompare(cmp));
emitJumpKind result = EJ_COUNT;
if (compareKind == CK_UNSIGNED)
{
result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
}
else if (compareKind == CK_SIGNED)
{
result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
}
else if (compareKind == CK_LOGICAL)
{
result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
}
assert(result != EJ_COUNT);
return result;
}
#ifdef _TARGET_ARMARCH_
//------------------------------------------------------------------------
// genEmitGSCookieCheck: Generate code to check that the GS cookie
// wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
//
void CodeGen::genEmitGSCookieCheck(bool pushReg)
{
noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
// Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
// executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
if (!pushReg && (compiler->info.compRetType == TYP_REF))
gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
// We need two temporary registers, to load the GS cookie values and compare them. We can't use
// any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
// callee-trash registers, which should not contain anything interesting at this point.
// We don't have any IR node representing this check, so LSRA can't communicate registers
// for us to use.
regNumber regGSConst = REG_GSCOOKIE_TMP_0;
regNumber regGSValue = REG_GSCOOKIE_TMP_1;
if (compiler->gsGlobalSecurityCookieAddr == nullptr)
{
// load the GS cookie constant into a reg
//
genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
}
else
{
// Ngen case - GS cookie constant needs to be accessed through an indirection.
instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
}
// Load this method's GS value from the stack frame
getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
// Compare with the GC cookie constant
getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
BasicBlock* gsCheckBlk = genCreateTempLabel();
emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
inst_JMP(jmpEqual, gsCheckBlk);
// regGSConst and regGSValue aren't needed anymore, we can use them for helper call
genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
genDefineTempLabel(gsCheckBlk);
}
#endif // _TARGET_ARMARCH_
/*****************************************************************************
*
* Generate an exit sequence for a return from a method (note: when compiling
* for speed there might be multiple exit points).
*/
void CodeGen::genExitCode(BasicBlock* block)
{
/* Just wrote the first instruction of the epilog - inform debugger
Note that this may result in a duplicate IPmapping entry, and
that this is ok */
// For non-optimized debuggable code, there is only one epilog.
genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
if (compiler->getNeedsGSSecurityCookie())
{
genEmitGSCookieCheck(jmpEpilog);
if (jmpEpilog)
{
// Dev10 642944 -
// The GS cookie check created a temp label that has no live
// incoming GC registers, we need to fix that
unsigned varNum;
LclVarDsc* varDsc;
/* Figure out which register parameters hold pointers */
for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
varNum++, varDsc++)
{
noway_assert(varDsc->lvIsParam);
gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
}
getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
}
}
genReserveEpilog(block);
}
//------------------------------------------------------------------------
// genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
//
// Notes:
// For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
// Otherwise, we generate the 'throw' inline.
//
// Arguments:
// jumpKind - jump kind to generate;
// codeKind - the special throw-helper kind;
// failBlk - optional fail target block, if it is already known;
//
void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTree* failBlk)
{
bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
#if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
// Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
#endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
if (useThrowHlpBlk)
{
// For code with throw helper blocks, find and use the helper block for
// raising the exception. The block may be shared by other trees too.
BasicBlock* excpRaisingBlock;
if (failBlk != nullptr)
{
// We already know which block to jump to. Use that.
assert(failBlk->gtOper == GT_LABEL);
excpRaisingBlock = failBlk->gtLabel.gtLabBB;
#ifdef DEBUG
Compiler::AddCodeDsc* add =
compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
assert(excpRaisingBlock == add->acdDstBlk);
#if !FEATURE_FIXED_OUT_ARGS
assert(add->acdStkLvlInit || isFramePointerUsed());
#endif // !FEATURE_FIXED_OUT_ARGS
#endif // DEBUG
}
else
{
// Find the helper-block which raises the exception.
Compiler::AddCodeDsc* add =
compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
excpRaisingBlock = add->acdDstBlk;
#if !FEATURE_FIXED_OUT_ARGS
assert(add->acdStkLvlInit || isFramePointerUsed());
#endif // !FEATURE_FIXED_OUT_ARGS
}
noway_assert(excpRaisingBlock != nullptr);
// Jump to the exception-throwing block on error.
inst_JMP(jumpKind, excpRaisingBlock);
}
else
{
// The code to throw the exception will be generated inline, and
// we will jump around it in the normal non-exception case.
BasicBlock* tgtBlk = nullptr;
emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
if (reverseJumpKind != jumpKind)
{
tgtBlk = genCreateTempLabel();
inst_JMP(reverseJumpKind, tgtBlk);
}
genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
// Define the spot for the normal non-exception case to jump to.
if (tgtBlk != nullptr)
{
assert(reverseJumpKind != jumpKind);
genDefineTempLabel(tgtBlk);
}
}
}
/*****************************************************************************
*
* The last operation done was generating code for "tree" and that would
* have set the flags. Check if the operation caused an overflow.
*/
// inline
void CodeGen::genCheckOverflow(GenTree* tree)
{
// Overflow-check should be asked for this tree
noway_assert(tree->gtOverflow());
const var_types type = tree->TypeGet();
// Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
noway_assert(!varTypeIsSmall(type));
emitJumpKind jumpKind;
#ifdef _TARGET_ARM64_
if (tree->OperGet() == GT_MUL)
{
jumpKind = EJ_ne;
}
else
#endif
{
bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
#if defined(_TARGET_XARCH_)
jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
#elif defined(_TARGET_ARMARCH_)
jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
if (jumpKind == EJ_lo)
{
if (tree->OperGet() != GT_SUB)
{
jumpKind = EJ_hs;
}
}
#endif // defined(_TARGET_ARMARCH_)
}
// Jump to the block which will throw the expection
genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
}
#if FEATURE_EH_FUNCLETS
/*****************************************************************************
*
* Update the current funclet as needed by calling genUpdateCurrentFunclet().
* For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
* is up-to-date.
*
*/
void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
{
if (block->bbFlags & BBF_FUNCLET_BEG)
{
compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
{
assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
}
else
{
// We shouldn't see FUNC_ROOT
assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
}
}
else
{
assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
{
assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
}
else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
{
assert(!block->hasHndIndex());
}
else
{
assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
}
}
}
#if defined(_TARGET_ARM_)
void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
{
// If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
// so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
// calls the funclet during non-exceptional control flow.
if (block->bbFlags & BBF_FINALLY_TARGET)
{
assert(block->bbFlags & BBF_JMP_TARGET);
#ifdef DEBUG
if (compiler->verbose)
{
printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
}
#endif
// Create a label that we'll use for computing the start of an EH region, if this block is
// at the beginning of such a region. If we used the existing bbEmitCookie as is for
// determining the EH regions, then this NOP would end up outside of the region, if this
// block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
// would be executed, which we would prefer not to do.
block->bbUnwindNopEmitCookie =
getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
instGen(INS_nop);
}
}
#endif
#endif // FEATURE_EH_FUNCLETS
/*****************************************************************************
*
* Generate code for the function.
*/
void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
{
#ifdef DEBUG
if (verbose)
{
printf("*************** In genGenerateCode()\n");
compiler->fgDispBasicBlocks(compiler->verboseTrees);
}
#endif
unsigned codeSize;
unsigned prologSize;
unsigned epilogSize;
void* consPtr;
#ifdef DEBUG
genInterruptibleUsed = true;
#if STACK_PROBES
genNeedPrologStackProbe = false;
#endif
compiler->fgDebugCheckBBlist();
#endif // DEBUG
/* This is the real thing */
genPrepForCompiler();
/* Prepare the emitter */
getEmitter()->Init();
#ifdef DEBUG
VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
#endif
#ifdef DEBUG
if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
{
compiler->opts.disAsm = true;
}
if (compiler->opts.disAsm)
{
printf("; Assembly listing for method %s\n", compiler->info.compFullName);
printf("; Emitting ");
if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
{
printf("SMALL_CODE");
}
else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
{
printf("FAST_CODE");
}
else
{
printf("BLENDED_CODE");
}
printf(" for ");
if (compiler->info.genCPU == CPU_X86)
{
printf("generic X86 CPU");
}
else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
{
printf("Pentium 4");
}
else if (compiler->info.genCPU == CPU_X64)
{
if (compiler->canUseVexEncoding())
{
printf("X64 CPU with AVX");
}
else
{
printf("X64 CPU with SSE2");
}
}
else if (compiler->info.genCPU == CPU_ARM)
{
printf("generic ARM CPU");
}
printf("\n");
if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
{
printf("; optimized code\n");
}
else if (compiler->opts.compDbgCode)
{
printf("; debuggable code\n");
}
else if (compiler->opts.MinOpts())
{
printf("; compiler->opts.MinOpts() is true\n");
}
else
{
printf("; unknown optimization flags\n");
}
#if DOUBLE_ALIGN
if (compiler->genDoubleAlign())
printf("; double-aligned frame\n");
else
#endif
printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
if (genInterruptible)
{
printf("; fully interruptible\n");
}
else
{
printf("; partially interruptible\n");
}
if (compiler->fgHaveProfileData())
{
printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
}
if (compiler->fgProfileData_ILSizeMismatch)
{
printf("; discarded IBC profile data due to mismatch in ILSize\n");
}
}
#endif // DEBUG
// We compute the final frame layout before code generation. This is because LSRA
// has already computed exactly the maximum concurrent number of spill temps of each type that are
// required during code generation. So, there is nothing left to estimate: we can be precise in the frame
// layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
// memory from the VM.
genFinalizeFrame();
unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps.
getEmitter()->emitBegFN(isFramePointerUsed()
#if defined(DEBUG)
,
(compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
#endif
,
maxTmpSize);
/* Now generate code for the function */
genCodeForBBlist();
#ifdef DEBUG
// After code generation, dump the frame layout again. It should be the same as before code generation, if code
// generation hasn't touched it (it shouldn't!).
if (verbose)
{
compiler->lvaTableDump();
}
#endif // DEBUG
/* We can now generate the function prolog and epilog */
genGeneratePrologsAndEpilogs();
/* Bind jump distances */
getEmitter()->emitJumpDistBind();
/* The code is now complete and final; it should not change after this. */
/* Compute the size of the code sections that we are going to ask the VM
to allocate. Note that this might not be precisely the size of the
code we emit, though it's fatal if we emit more code than the size we
compute here.
(Note: an example of a case where we emit less code would be useful.)
*/
getEmitter()->emitComputeCodeSizes();
#ifdef DEBUG
// Code to test or stress our ability to run a fallback compile.
// We trigger the fallback here, before asking the VM for any memory,
// because if not, we will leak mem, as the current codebase can't free
// the mem after the emitter asks the VM for it. As this is only a stress
// mode, we only want the functionality, and don't care about the relative
// ugliness of having the failure here.
if (!compiler->jitFallbackCompile)
{
// Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
// especially that caused by enabling JIT stress.
if (!JitConfig.JitNoForceFallback())
{
if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
{
NO_WAY_NOASSERT("Stress failure");
}
}
}
#endif // DEBUG
/* We've finished collecting all the unwind information for the function. Now reserve
space for it from the VM.
*/
compiler->unwindReserve();
#if DISPLAY_SIZES
size_t dataSize = getEmitter()->emitDataSize();
#endif // DISPLAY_SIZES
void* coldCodePtr;
bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
trackedStackPtrsContig = false;
#elif defined(_TARGET_ARM_)
// On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
#else
trackedStackPtrsContig = !compiler->opts.compDbgEnC;
#endif
#ifdef DEBUG
/* We're done generating code for this function */
compiler->compCodeGenDone = true;
#endif
compiler->EndPhase(PHASE_GENERATE_CODE);
codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
(compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
&prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
compiler->EndPhase(PHASE_EMIT_CODE);
#ifdef DEBUG
if (compiler->opts.disAsm)
{
printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
compiler->info.compFullName);
printf("; ============================================================\n");
printf(""); // in our logic this causes a flush
}
if (verbose)
{
printf("*************** After end code gen, before unwindEmit()\n");
getEmitter()->emitDispIGlist(true);
}
#endif
#if EMIT_TRACK_STACK_DEPTH
/* Check our max stack level. Needed for fgAddCodeRef().
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
{
unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
#if defined(UNIX_X86_ABI)
maxAllowedStackDepth += maxNestedAlignment;
#endif
noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
}
#endif // EMIT_TRACK_STACK_DEPTH
*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
// printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
// Make sure that the x86 alignment and cache prefetch optimization rules
// were obeyed.
// Don't start a method in the last 7 bytes of a 16-byte alignment area
// unless we are generating SMALL_CODE
// noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
/* Now that the code is issued, we can finalize and emit the unwind data */
compiler->unwindEmit(*codePtr, coldCodePtr);
/* Finalize the line # tracking logic after we know the exact block sizes/offsets */
genIPmappingGen();
/* Finalize the Local Var info in terms of generated code */
genSetScopeInfo();
#ifdef LATE_DISASM
unsigned finalHotCodeSize;
unsigned finalColdCodeSize;
if (compiler->fgFirstColdBlock != nullptr)
{
// We did some hot/cold splitting. The hot section is always padded out to the
// size we thought it would be, but the cold section is not.
assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
assert(compiler->info.compTotalHotCodeSize > 0);
assert(compiler->info.compTotalColdCodeSize > 0);
finalHotCodeSize = compiler->info.compTotalHotCodeSize;
finalColdCodeSize = codeSize - finalHotCodeSize;
}
else
{
// No hot/cold splitting
assert(codeSize <= compiler->info.compTotalHotCodeSize);
assert(compiler->info.compTotalHotCodeSize > 0);
assert(compiler->info.compTotalColdCodeSize == 0);
finalHotCodeSize = codeSize;
finalColdCodeSize = 0;
}
getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
#endif // LATE_DISASM
/* Report any exception handlers to the VM */
genReportEH();
#ifdef JIT32_GCENCODER
#ifdef DEBUG
void* infoPtr =
#endif // DEBUG
#endif
// Create and store the GC info for this method.
genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
#ifdef DEBUG
FILE* dmpf = jitstdout;
compiler->opts.dmpHex = false;
if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
{
FILE* codf;
errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
if (ec != 0)
{
assert(codf);
dmpf = codf;
compiler->opts.dmpHex = true;
}
}
if (compiler->opts.dmpHex)
{
size_t consSize = getEmitter()->emitDataSize();
size_t infoSize = compiler->compInfoBlkSize;
fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
fprintf(dmpf, "\n");
if (codeSize)
{
fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
}
if (consSize)
{
fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
}
#ifdef JIT32_GCENCODER
if (infoSize)
fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
#endif // JIT32_GCENCODER
fprintf(dmpf, "\n");
if (codeSize)
{
hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
}
if (consSize)
{
hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
}
#ifdef JIT32_GCENCODER
if (infoSize)
hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
#endif // JIT32_GCENCODER
fflush(dmpf);
}
if (dmpf != jitstdout)
{
fclose(dmpf);
}
#endif // DEBUG
/* Tell the emitter that we're done with this function */
getEmitter()->emitEndFN();
/* Shut down the spill logic */
regSet.rsSpillDone();
/* Shut down the temp logic */
regSet.tmpDone();
#if DISPLAY_SIZES
grossVMsize += compiler->info.compILCodeSize;
totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
grossNCsize += codeSize + dataSize;
#endif // DISPLAY_SIZES
compiler->EndPhase(PHASE_EMIT_GCEH);
}
/*****************************************************************************
*
* Report EH clauses to the VM
*/
void CodeGen::genReportEH()
{
if (compiler->compHndBBtabCount == 0)
{
return;
}
#ifdef DEBUG
if (compiler->opts.dspEHTable)
{
printf("*************** EH table for %s\n", compiler->info.compFullName);
}
#endif // DEBUG
unsigned XTnum;
EHblkDsc* HBtab;
EHblkDsc* HBtabEnd;
bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
unsigned EHCount = compiler->compHndBBtabCount;
#if FEATURE_EH_FUNCLETS
// Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
// VM.
unsigned duplicateClauseCount = 0;
unsigned enclosingTryIndex;
// Duplicate clauses are not used by CoreRT ABI
if (!isCoreRTABI)
{
for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
{
for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
// ignoring 'mutual protect' trys
enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
{
++duplicateClauseCount;
}
}
EHCount += duplicateClauseCount;
}
#if FEATURE_EH_CALLFINALLY_THUNKS
unsigned clonedFinallyCount = 0;
// Duplicate clauses are not used by CoreRT ABI
if (!isCoreRTABI)
{
// We don't keep track of how many cloned finally there are. So, go through and count.
// We do a quick pass first through the EH table to see if there are any try/finally
// clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
bool anyFinallys = false;
for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
HBtab < HBtabEnd; HBtab++)
{
if (HBtab->HasFinallyHandler())
{
anyFinallys = true;
break;
}
}
if (anyFinallys)
{
for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
if (block->bbJumpKind == BBJ_CALLFINALLY)
{
++clonedFinallyCount;
}
}
EHCount += clonedFinallyCount;
}
}
#endif // FEATURE_EH_CALLFINALLY_THUNKS
#endif // FEATURE_EH_FUNCLETS
#ifdef DEBUG
if (compiler->opts.dspEHTable)
{
#if FEATURE_EH_FUNCLETS
#if FEATURE_EH_CALLFINALLY_THUNKS
printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
#else // !FEATURE_EH_CALLFINALLY_THUNKS
printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
#endif // !FEATURE_EH_CALLFINALLY_THUNKS
#else // !FEATURE_EH_FUNCLETS
printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
assert(compiler->compHndBBtabCount == EHCount);
#endif // !FEATURE_EH_FUNCLETS
}
#endif // DEBUG
// Tell the VM how many EH clauses to expect.
compiler->eeSetEHcount(EHCount);
XTnum = 0; // This is the index we pass to the VM
for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
HBtab < HBtabEnd; HBtab++)
{
UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
: compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
: compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
if (HBtab->HasFilter())
{
hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
}
else
{
hndTyp = HBtab->ebdTyp;
}
CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
if (isCoreRTABI && (XTnum > 0))
{
// For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
// try block as the previous one. The runtime cannot reliably infer this information from
// native code offsets because of different try blocks can have same offsets. Alternative
// solution to this problem would be inserting extra nops to ensure that different try
// blocks have different offsets.
if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
{
// The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
// allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
// IL as "try { try {} catch {} catch {} } finally {}".
assert(HBtab->HasCatchHandler());
flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
}
}
// Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
// the fields aren't accurate.
CORINFO_EH_CLAUSE clause;
clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
clause.Flags = flags;
clause.TryOffset = tryBeg;
clause.TryLength = tryEnd;
clause.HandlerOffset = hndBeg;
clause.HandlerLength = hndEnd;
assert(XTnum < EHCount);
// Tell the VM about this EH clause.
compiler->eeSetEHinfo(XTnum, &clause);
++XTnum;
}
#if FEATURE_EH_FUNCLETS
// Now output duplicated clauses.
//
// If a funclet has been created by moving a handler out of a try region that it was originally nested
// within, then we need to report a "duplicate" clause representing the fact that an exception in that
// handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
// descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
// no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
// region as the enclosing try region's handler region. This is the sense in which it is duplicated:
// there is now a "duplicate" clause with the same handler region as another, but a different 'try'
// region.
//
// For example, consider this (capital letters represent an unknown code sequence, numbers identify a
// try or handler region):
//
// A
// try (1) {
// B
// try (2) {
// C
// } catch (3) {
// D
// } catch (4) {
// E
// }
// F
// } catch (5) {
// G
// }
// H
//
// Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
// by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
// This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
// to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
// note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
// The code "D" and "E" won't be contiguous with the protected region for try (1) (which
// will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
// representing try (1) protecting the new funclets catch (3) and (4).
// The code will be generated as follows:
//
// ABCFH // "main" code
// D // funclet
// E // funclet
// G // funclet
//
// The EH regions are:
//
// C -> D
// C -> E
// BCF -> G
// D -> G // "duplicate" clause
// E -> G // "duplicate" clause
//
// Note that we actually need to generate one of these additional "duplicate" clauses for every
// region the funclet is nested in. Take this example:
//
// A
// try (1) {
// B
// try (2,3) {
// C
// try (4) {
// D
// try (5,6) {
// E
// } catch {
// F
// } catch {
// G
// }
// H
// } catch {
// I
// }
// J
// } catch {
// K
// } catch {
// L
// }
// M
// } catch {
// N
// }
// O
//
// When we pull out funclets, we get the following generated code:
//
// ABCDEHJMO // "main" function
// F // funclet
// G // funclet
// I // funclet
// K // funclet
// L // funclet
// N // funclet
//
// And the EH regions we report to the VM are (in order; main clauses
// first in most-to-least nested order, funclets ("duplicated clauses")
// last, in most-to-least nested) are:
//
// E -> F
// E -> G
// DEH -> I
// CDEHJ -> K
// CDEHJ -> L
// BCDEHJM -> N
// F -> I // funclet clause #1 for F
// F -> K // funclet clause #2 for F
// F -> L // funclet clause #3 for F
// F -> N // funclet clause #4 for F
// G -> I // funclet clause #1 for G
// G -> K // funclet clause #2 for G
// G -> L // funclet clause #3 for G
// G -> N // funclet clause #4 for G
// I -> K // funclet clause #1 for I
// I -> L // funclet clause #2 for I
// I -> N // funclet clause #3 for I
// K -> N // funclet clause #1 for K
// L -> N // funclet clause #1 for L
//
// So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
// Note that due to the nature of 'mutually protect' clauses, it would be incorrect
// to add a clause "F -> G" because F is NOT protected by G, but we still have
// both "F -> K" and "F -> L" because F IS protected by both of those handlers.
//
// The overall ordering of the clauses is still the same most-to-least nesting
// after front-to-back start offset. Because we place the funclets at the end
// these new clauses should also go at the end by this ordering.
//
if (duplicateClauseCount > 0)
{
unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
unsigned XTnum2;
for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
{
unsigned enclosingTryIndex;
EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
// ignoring 'mutual protect' trys
enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
{
// The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
// that will have the enclosing try protecting the funclet.
noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
// greater EH table index
EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
// The try region is the handler of the funclet. Note that for filters, we don't protect the
// filter region, only the filter handler region. This is because exceptions in filters never
// escape; the VM swallows them.
BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
BasicBlock* bbTryLast = fletTab->ebdHndLast;
BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
BasicBlock* bbHndLast = encTab->ebdHndLast;
UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
tryBeg = compiler->ehCodeOffset(bbTryBeg);
hndBeg = compiler->ehCodeOffset(bbHndBeg);
tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
: compiler->ehCodeOffset(bbTryLast->bbNext);
hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
: compiler->ehCodeOffset(bbHndLast->bbNext);
if (encTab->HasFilter())
{
hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
}
else
{
hndTyp = encTab->ebdTyp;
}
CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
// Tell the VM this is an extra clause caused by moving funclets out of line.
flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
// Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
// the fields aren't really accurate. For example, we set "TryLength" to the offset of the
// instruction immediately after the 'try' body. So, it really could be more accurately named
// "TryEndOffset".
CORINFO_EH_CLAUSE clause;
clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
clause.Flags = flags;
clause.TryOffset = tryBeg;
clause.TryLength = tryEnd;
clause.HandlerOffset = hndBeg;
clause.HandlerLength = hndEnd;
assert(XTnum < EHCount);
// Tell the VM about this EH clause (a duplicated clause).
compiler->eeSetEHinfo(XTnum, &clause);
++XTnum;
++reportedDuplicateClauseCount;
#ifndef DEBUG
if (duplicateClauseCount == reportedDuplicateClauseCount)
{
break; // we've reported all of them; no need to continue looking
}
#endif // !DEBUG
} // for each 'true' enclosing 'try'
} // for each EH table entry
assert(duplicateClauseCount == reportedDuplicateClauseCount);
} // if (duplicateClauseCount > 0)
#if FEATURE_EH_CALLFINALLY_THUNKS
if (clonedFinallyCount > 0)
{
unsigned reportedClonedFinallyCount = 0;
for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
if (block->bbJumpKind == BBJ_CALLFINALLY)
{
UNATIVE_OFFSET hndBeg, hndEnd;
hndBeg = compiler->ehCodeOffset(block);
// How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
// a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
BasicBlock* bbLabel = block->bbNext;
if (block->isBBCallAlwaysPair())
{
bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
}
if (bbLabel == nullptr)
{
hndEnd = compiler->info.compNativeCodeSize;
}
else
{
assert(bbLabel->bbEmitCookie != nullptr);
hndEnd = compiler->ehCodeOffset(bbLabel);
}
CORINFO_EH_CLAUSE clause;
clause.ClassToken = 0; // unused
clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
clause.TryOffset = hndBeg;
clause.TryLength = hndBeg;
clause.HandlerOffset = hndBeg;
clause.HandlerLength = hndEnd;
assert(XTnum < EHCount);
// Tell the VM about this EH clause (a cloned finally clause).
compiler->eeSetEHinfo(XTnum, &clause);
++XTnum;
++reportedClonedFinallyCount;
#ifndef DEBUG
if (clonedFinallyCount == reportedClonedFinallyCount)
{
break; // we're done; no need to keep looking
}
#endif // !DEBUG
} // block is BBJ_CALLFINALLY
} // for each block
assert(clonedFinallyCount == reportedClonedFinallyCount);
} // if (clonedFinallyCount > 0)
#endif // FEATURE_EH_CALLFINALLY_THUNKS
#endif // FEATURE_EH_FUNCLETS
assert(XTnum == EHCount);
}
//----------------------------------------------------------------------
// genUseOptimizedWriteBarriers: Determine if an optimized write barrier
// helper should be used.
//
// Arguments:
// wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
//
// Return Value:
// true if an optimized write barrier helper should be used, false otherwise.
// Note: only x86 implements register-specific source optimized write
// barriers currently.
//
bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
{
#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
#ifdef DEBUG
return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
#else
return true;
#endif
#else
return false;
#endif
}
//----------------------------------------------------------------------
// genUseOptimizedWriteBarriers: Determine if an optimized write barrier
// helper should be used.
//
// This has the same functionality as the version of
// genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
// determining what the required write barrier form is, if possible.
//
// Arguments:
// tgt - target tree of write (e.g., GT_STOREIND)
// assignVal - tree with value to write
//
// Return Value:
// true if an optimized write barrier helper should be used, false otherwise.
// Note: only x86 implements register-specific source optimized write
// barriers currently.
//
bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
{
#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
#ifdef DEBUG
GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
#else
return true;
#endif
#else
return false;
#endif
}
//----------------------------------------------------------------------
// genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
// barrier, and the write barrier form required, determine the helper to call.
//
// Arguments:
// tgt - target tree of write (e.g., GT_STOREIND)
// wbf - already computed write barrier form to use
//
// Return Value:
// Write barrier helper to use.
//
// Note: do not call this function to get an optimized write barrier helper (e.g.,
// for x86).
//
CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
{
noway_assert(tgt->gtOper == GT_STOREIND);
CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
#ifdef DEBUG
if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
{
helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
}
else
#endif
if (tgt->gtOper != GT_CLS_VAR)
{
if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
{
if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
{
helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
}
else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
{
helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
}
}
}
assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
(wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
((helper == CORINFO_HELP_ASSIGN_REF) &&
(wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
return helper;
}
//----------------------------------------------------------------------
// genGCWriteBarrier: Generate a write barrier for a node.
//
// Arguments:
// tgt - target tree of write (e.g., GT_STOREIND)
// wbf - already computed write barrier form to use
//
void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
{
CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
// We classify the "tgt" trees as follows:
// If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
// IND [-> ADDR -> IND] -> { GT_LCL_VAR, GT_REG_VAR, ADD({GT_LCL_VAR, GT_REG_VAR}, X), ADD(X, (GT_LCL_VAR,
// GT_REG_VAR)) }
// then let "v" be the GT_LCL_VAR or GT_REG_VAR.
// * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
// * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
// * Otherwise, classify as CWBKind_OtherByRefLocal.
// If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
// Otherwise, classify as CWBKind_Unclassified.
CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
if (tgt->gtOper == GT_IND)
{
GenTree* lcl = NULL;
GenTree* indArg = tgt->gtOp.gtOp1;
if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
{
indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
}
if (indArg->gtOper == GT_LCL_VAR || indArg->gtOper == GT_REG_VAR)
{
lcl = indArg;
}
else if (indArg->gtOper == GT_ADD)
{
if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp1->gtOper == GT_REG_VAR)
{
lcl = indArg->gtOp.gtOp1;
}
else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp2->gtOper == GT_REG_VAR)
{
lcl = indArg->gtOp.gtOp2;
}
}
if (lcl != NULL)
{
wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
unsigned lclNum = 0;
if (lcl->gtOper == GT_LCL_VAR)
lclNum = lcl->gtLclVarCommon.gtLclNum;
else
{
assert(lcl->gtOper == GT_REG_VAR);
lclNum = lcl->gtRegVar.gtLclNum;
}
if (lclNum == compiler->info.compRetBuffArg)
{
wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
}
else
{
LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
{
wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
}
}
}
else
{
// We should have eliminated the barrier for this case.
assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
}
}
if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
{
#if 0
#ifdef DEBUG
// Enable this to sample the unclassified trees.
static int unclassifiedBarrierSite = 0;
if (wbKind == CWBKind_Unclassified)
{
unclassifiedBarrierSite++;
printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
}
#endif // DEBUG
#endif // 0
AddStackLevel(4);
inst_IV(INS_push, wbKind);
genEmitHelperCall(helper,
4, // argSize
EA_PTRSIZE); // retSize
SubtractStackLevel(4);
}
else
{
genEmitHelperCall(helper,
0, // argSize
EA_PTRSIZE); // retSize
}
#else // !FEATURE_COUNT_GC_WRITE_BARRIERS
genEmitHelperCall(helper,
0, // argSize
EA_PTRSIZE); // retSize
#endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
}
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX XX
XX Prolog / Epilog XX
XX XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
/*****************************************************************************
*
* Generates code for moving incoming register arguments to their
* assigned location, in the function prolog.
*/
#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
{
#ifdef DEBUG
if (verbose)
{
printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
}
#endif
unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
unsigned argNum; // current argNum, always in [0..argMax-1]
unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
unsigned regArgNum; // index into the regArgTab[] table
regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
bool doingFloat = regState->rsIsFloat;
// We should be generating the prolog block when we are called
assert(compiler->compGeneratingProlog);
// We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
noway_assert(regArgMaskLive != 0);
// If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
// If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
//
// The regArgTab can always have unused entries,
// for example if an architecture always increments the arg register number but uses either
// an integer register or a floating point register to hold the next argument
// then with a mix of float and integer args you could have:
//
// sampleMethod(int i, float x, int j, float y, int k, float z);
// r0, r2 and r4 as valid integer arguments with argMax as 5
// and f1, f3 and f5 and valid floating point arguments with argMax as 6
// The first one is doingFloat==false and the second one is doingFloat==true
//
// If a fixed return buffer (in r8) was also present then the first one would become:
// r0, r2, r4 and r8 as valid integer arguments with argMax as 9
//
argMax = regState->rsCalleeRegArgCount;
fixedRetBufIndex = (unsigned)-1; // Invalid value
// If necessary we will select a correct xtraReg for circular floating point args later.
if (doingFloat)
{
xtraReg = REG_NA;
noway_assert(argMax <= MAX_FLOAT_REG_ARG);
}
else // we are doing the integer registers
{
noway_assert(argMax <= MAX_REG_ARG);
if (hasFixedRetBuffReg())
{
fixedRetBufIndex = theFixedRetBuffArgNum();
// We have an additional integer register argument when hasFixedRetBuffReg() is true
argMax = fixedRetBufIndex + 1;
assert(argMax == (MAX_REG_ARG + 1));
}
}
//
// Construct a table with the register arguments, for detecting circular and
// non-circular dependencies between the register arguments. A dependency is when
// an argument register Rn needs to be moved to register Rm that is also an argument
// register. The table is constructed in the order the arguments are passed in
// registers: the first register argument is in regArgTab[0], the second in
// regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
// at an even index. The regArgTab is indexed from 0 to argMax - 1.
// Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
// we have increased the allocated size of the regArgTab[] by one.
//
struct regArgElem
{
unsigned varNum; // index into compiler->lvaTable[] for this register argument
#if defined(UNIX_AMD64_ABI)
var_types type; // the Jit type of this regArgTab entry
#endif // defined(UNIX_AMD64_ABI)
unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
// That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
// argument register number 'x'. Only used when circular = true.
char slot; // 0 means the register is not used for a register argument
// 1 means the first part of a register argument
// 2, 3 or 4 means the second,third or fourth part of a multireg argument
bool stackArg; // true if the argument gets homed to the stack
bool processed; // true after we've processed the argument (and it is in its final location)
bool circular; // true if this register participates in a circular dependency loop.
#ifdef UNIX_AMD64_ABI
// For UNIX AMD64 struct passing, the type of the register argument slot can differ from
// the type of the lclVar in ways that are not ascertainable from lvType.
// So, for that case we retain the type of the register in the regArgTab.
var_types getRegType(Compiler* compiler)
{
return type; // UNIX_AMD64 implementation
}
#else // !UNIX_AMD64_ABI
// In other cases, we simply use the type of the lclVar to determine the type of the register.
var_types getRegType(Compiler* compiler)
{
const LclVarDsc& varDsc = compiler->lvaTable[varNum];
// Check if this is an HFA register arg and return the HFA type
if (varDsc.lvIsHfaRegArg())
{
#if defined(_TARGET_WINDOWS_)
// Cannot have hfa types on windows arm targets
// in vararg methods.
assert(!compiler->info.compIsVarArgs);
#endif // defined(_TARGET_WINDOWS_)
return varDsc.GetHfaType();
}
return compiler->mangleVarArgsType(varDsc.lvType);
}
#endif // !UNIX_AMD64_ABI
} regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
unsigned varNum;
LclVarDsc* varDsc;
for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
{
varDsc = compiler->lvaTable + varNum;
// Is this variable a register arg?
if (!varDsc->lvIsParam)
{
continue;
}
if (!varDsc->lvIsRegArg)
{
continue;
}
// When we have a promoted struct we have two possible LclVars that can represent the incoming argument
// in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
// We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
// use the the original TYP_STRUCT argument.
//
if (varDsc->lvPromoted || varDsc->lvIsStructField)
{
LclVarDsc* parentVarDsc = varDsc;
if (varDsc->lvIsStructField)
{
assert(!varDsc->lvPromoted);
parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
}
Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
{
noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
// For register arguments that are independent promoted structs we put the promoted field varNum in the
// regArgTab[]
if (varDsc->lvPromoted)
{
continue;
}
}
else
{
// For register arguments that are not independent promoted structs we put the parent struct varNum in
// the regArgTab[]
if (varDsc->lvIsStructField)
{
continue;
}
}
}
var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
// Change regType to the HFA type when we have a HFA argument
if (varDsc->lvIsHfaRegArg())
{
#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
if (compiler->info.compIsVarArgs)
{
assert(!"Illegal incoming HFA arg encountered in Vararg method.");
}
#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
regType = varDsc->GetHfaType();
}
#if defined(UNIX_AMD64_ABI)
if (!varTypeIsStruct(regType))
#endif // defined(UNIX_AMD64_ABI)
{
// A struct might be passed partially in XMM register for System V calls.
// So a single arg might use both register files.
if (isFloatRegType(regType) != doingFloat)
{
continue;
}
}
int slots = 0;
#if defined(UNIX_AMD64_ABI)
if (varTypeIsStruct(varDsc))
{
CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
assert(typeHnd != nullptr);
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
if (!structDesc.passedInRegisters)
{
// The var is not passed in registers.
continue;
}
unsigned firstRegSlot = 0;
for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
{
regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
var_types regType;
#ifdef FEATURE_SIMD
// Assumption 1:
// RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
// to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
// reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
// home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
//
// Assumption 2:
// RyuJit backend is making another implicit assumption that Vector3 type args when passed in
// registers or on stack, the upper most 4-bytes will be zero.
//
// For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
// that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
// invalid.
//
// RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
// bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
// passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
// there is no need to clear upper 4-bytes of Vector3 type args.
//
// RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
// Vector3 return values are returned two return registers and Caller assembles them into a
// single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
// type args in prolog and Vector3 type return value of a call
if (varDsc->lvType == TYP_SIMD12)
{
regType = TYP_DOUBLE;
}
else
#endif
{
regType = compiler->GetEightByteType(structDesc, slotCounter);
}
regArgNum = genMapRegNumToRegArgNum(regNum, regType);
if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
(doingFloat && (structDesc.IsSseSlot(slotCounter))))
{
// Store the reg for the first slot.
if (slots == 0)
{
firstRegSlot = regArgNum;
}
// Bingo - add it to our table
noway_assert(regArgNum < argMax);
noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
// not be multiple vars representing this argument
// register)
regArgTab[regArgNum].varNum = varNum;
regArgTab[regArgNum].slot = (char)(slotCounter + 1);
regArgTab[regArgNum].type = regType;
slots++;
}
}
if (slots == 0)
{
continue; // Nothing to do for this regState set.
}
regArgNum = firstRegSlot;
}
else
#endif // defined(UNIX_AMD64_ABI)
{
// Bingo - add it to our table
regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
noway_assert(regArgNum < argMax);
// We better not have added it already (there better not be multiple vars representing this argument
// register)
noway_assert(regArgTab[regArgNum].slot == 0);
#if defined(UNIX_AMD64_ABI)
// Set the register type.
regArgTab[regArgNum].type = regType;
#endif // defined(UNIX_AMD64_ABI)
regArgTab[regArgNum].varNum = varNum;
regArgTab[regArgNum].slot = 1;
slots = 1;
#if FEATURE_MULTIREG_ARGS
if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
{
if (varDsc->lvIsHfaRegArg())
{
// We have an HFA argument, set slots to the number of registers used
slots = varDsc->lvHfaSlots();
}
else
{
// Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
// We have a non-HFA multireg argument, set slots to two
slots = 2;
}
// Note that regArgNum+1 represents an argument index not an actual argument register.
// see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
// This is the setup for the rest of a multireg struct arg
for (int i = 1; i < slots; i++)
{
noway_assert((regArgNum + i) < argMax);
// We better not have added it already (there better not be multiple vars representing this argument
// register)
noway_assert(regArgTab[regArgNum + i].slot == 0);
regArgTab[regArgNum + i].varNum = varNum;
regArgTab[regArgNum + i].slot = (char)(i + 1);
}
}
#endif // FEATURE_MULTIREG_ARGS
}
#ifdef _TARGET_ARM_
int lclSize = compiler->lvaLclSize(varNum);
if (lclSize > REGSIZE_BYTES)
{
unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
slots = lclSize / REGSIZE_BYTES;
if (regArgNum + slots > maxRegArgNum)
{
slots = maxRegArgNum - regArgNum;
}
}
C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
assert(slots < INT8_MAX);
for (char i = 1; i < slots; i++)
{
regArgTab[regArgNum + i].varNum = varNum;
regArgTab[regArgNum + i].slot = i + 1;
}
#endif // _TARGET_ARM_
for (int i = 0; i < slots; i++)
{
regType = regArgTab[regArgNum + i].getRegType(compiler);
regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
#if !defined(UNIX_AMD64_ABI)
assert((i > 0) || (regNum == varDsc->lvArgReg));
#endif // defined(UNIX_AMD64_ABI)
// Is the arg dead on entry to the method ?
if ((regArgMaskLive & genRegMask(regNum)) == 0)
{
if (varDsc->lvTrackedNonStruct())
{
noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
}
else
{
#ifdef _TARGET_X86_
noway_assert(varDsc->lvType == TYP_STRUCT);
#else // !_TARGET_X86_
// For LSRA, it may not be in regArgMaskLive if it has a zero
// refcnt. This is in contrast with the non-LSRA case in which all
// non-tracked args are assumed live on entry.
noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
(varDsc->lvAddrExposed && compiler->info.compIsVarArgs) ||
(varDsc->lvAddrExposed && compiler->opts.compUseSoftFP));
#endif // !_TARGET_X86_
}
// Mark it as processed and be done with it
regArgTab[regArgNum + i].processed = true;
goto NON_DEP;
}
#ifdef _TARGET_ARM_
// On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
// could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
// they've already been spilled.
//
if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
#endif // _TARGET_ARM_
{
#if !defined(UNIX_AMD64_ABI)
noway_assert(xtraReg != (varDsc->lvArgReg + i));
#endif
noway_assert(regArgMaskLive & genRegMask(regNum));
}
regArgTab[regArgNum + i].processed = false;
/* mark stack arguments since we will take care of those first */
regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
/* If it goes on the stack or in a register that doesn't hold
* an argument anymore -> CANNOT form a circular dependency */
if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
{
/* will trash another argument -> possible dependency
* We may need several passes after the table is constructed
* to decide on that */
/* Maybe the argument stays in the register (IDEAL) */
if ((i == 0) && (varDsc->lvRegNum == regNum))
{
goto NON_DEP;
}
#if !defined(_TARGET_64BIT_)
if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
{
goto NON_DEP;
}
if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
{
goto NON_DEP;
}
if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
(REG_NEXT(varDsc->lvRegNum) == regNum))
{
goto NON_DEP;
}
#endif // !defined(_TARGET_64BIT_)
regArgTab[regArgNum + i].circular = true;
}
else
{
NON_DEP:
regArgTab[regArgNum + i].circular = false;
/* mark the argument register as free */
regArgMaskLive &= ~genRegMask(regNum);
}
}
}
/* Find the circular dependencies for the argument registers, if any.
* A circular dependency is a set of registers R1, R2, ..., Rn
* such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
bool change = true;
if (regArgMaskLive)
{
/* Possible circular dependencies still exist; the previous pass was not enough
* to filter them out. Use a "sieve" strategy to find all circular dependencies. */
while (change)
{
change = false;
for (argNum = 0; argNum < argMax; argNum++)
{
// If we already marked the argument as non-circular then continue
if (!regArgTab[argNum].circular)
{
continue;
}
if (regArgTab[argNum].slot == 0) // Not a register argument
{
continue;
}
varNum = regArgTab[argNum].varNum;
noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
/* cannot possibly have stack arguments */
noway_assert(varDsc->lvIsInReg());
noway_assert(!regArgTab[argNum].stackArg);
var_types regType = regArgTab[argNum].getRegType(compiler);
regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
regNumber destRegNum = REG_NA;
if (regArgTab[argNum].slot == 1)
{
destRegNum = varDsc->lvRegNum;
}
#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
else
{
assert(regArgTab[argNum].slot == 2);
assert(argNum > 0);
assert(regArgTab[argNum - 1].slot == 1);
assert(regArgTab[argNum - 1].varNum == varNum);
assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
regArgMaskLive &= ~genRegMask(regNum);
regArgTab[argNum].circular = false;
change = true;
continue;
}
#elif !defined(_TARGET_64BIT_)
else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
{
destRegNum = varDsc->lvOtherReg;
}
else
{
assert(regArgTab[argNum].slot == 2);
assert(varDsc->TypeGet() == TYP_DOUBLE);
destRegNum = REG_NEXT(varDsc->lvRegNum);
}
#endif // !defined(_TARGET_64BIT_)
noway_assert(destRegNum != REG_NA);
if (genRegMask(destRegNum) & regArgMaskLive)
{
/* we are trashing a live argument register - record it */
unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
noway_assert(destRegArgNum < argMax);
regArgTab[destRegArgNum].trashBy = argNum;
}
else
{
/* argument goes to a free register */
regArgTab[argNum].circular = false;
change = true;
/* mark the argument register as free */
regArgMaskLive &= ~genRegMask(regNum);
}
}
}
}
/* At this point, everything that has the "circular" flag
* set to "true" forms a circular dependency */
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (regArgMaskLive)
{
if (verbose)
{
printf("Circular dependencies found while home-ing the incoming arguments.\n");
}
}
#endif
// LSRA allocates registers to incoming parameters in order and will not overwrite
// a register still holding a live parameter.
noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
"Homing of float argument registers with circular dependencies not implemented.");
/* Now move the arguments to their locations.
* First consider ones that go on the stack since they may
* free some registers. */
regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
for (argNum = 0; argNum < argMax; argNum++)