Permalink
Fetching contributors…
Cannot retrieve contributors at this time
18983 lines (16497 sloc) 703 KB
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX XX
XX Morph XX
XX XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#endif
#include "allocacheck.h" // for alloca
// Convert the given node into a call to the specified helper passing
// the given argument list.
//
// Tries to fold constants and also adds an edge for overflow exception
// returns the morphed tree
GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper)
{
GenTree* result;
/* If the operand is a constant, we'll try to fold it */
if (oper->OperIsConst())
{
GenTree* oldTree = tree;
tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
if (tree != oldTree)
{
return fgMorphTree(tree);
}
else if (tree->OperKind() & GTK_CONST)
{
return fgMorphConst(tree);
}
// assert that oper is unchanged and that it is still a GT_CAST node
noway_assert(tree->gtCast.CastOp() == oper);
noway_assert(tree->gtOper == GT_CAST);
}
result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
assert(result == tree);
return result;
}
/*****************************************************************************
*
* Convert the given node into a call to the specified helper passing
* the given argument list.
*/
GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args)
{
// The helper call ought to be semantically equivalent to the original node, so preserve its VN.
tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
tree->gtCall.gtCallType = CT_HELPER;
tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
tree->gtCall.gtCallArgs = args;
tree->gtCall.gtCallObjp = nullptr;
tree->gtCall.gtCallLateArgs = nullptr;
tree->gtCall.fgArgInfo = nullptr;
tree->gtCall.gtRetClsHnd = nullptr;
tree->gtCall.gtCallMoreFlags = 0;
tree->gtCall.gtInlineCandidateInfo = nullptr;
tree->gtCall.gtControlExpr = nullptr;
#if DEBUG
// Helper calls are never candidates.
tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
#endif // DEBUG
#ifdef FEATURE_READYTORUN_COMPILER
tree->gtCall.gtEntryPoint.addr = nullptr;
tree->gtCall.gtEntryPoint.accessType = IAT_VALUE;
#endif
#ifndef _TARGET_64BIT_
if (varTypeIsLong(tree))
{
GenTreeCall* callNode = tree->AsCall();
ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
retTypeDesc->Reset();
retTypeDesc->InitializeLongReturnType(this);
callNode->ClearOtherRegs();
}
#endif // !_TARGET_64BIT_
if (tree->OperMayThrow(this))
{
tree->gtFlags |= GTF_EXCEPT;
}
else
{
tree->gtFlags &= ~GTF_EXCEPT;
}
tree->gtFlags |= GTF_CALL;
if (args)
{
tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
}
/* Perform the morphing */
tree = fgMorphArgs(tree->AsCall());
return tree;
}
/*****************************************************************************
*
* Morph a cast node (we perform some very simple transformations here).
*/
#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
GenTree* Compiler::fgMorphCast(GenTree* tree)
{
noway_assert(tree->gtOper == GT_CAST);
noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE);
/* The first sub-operand is the thing being cast */
GenTree* oper = tree->gtCast.CastOp();
if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
{
// Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
// If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
// morphing code to see that type.
fgMorphImplicitByRefArgs(oper);
}
var_types srcType = genActualType(oper->TypeGet());
var_types dstType = tree->CastToType();
unsigned dstSize = genTypeSize(dstType);
// See if the cast has to be done in two steps. R -> I
if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
{
// Only x86 must go through TYP_DOUBLE to get to all
// integral types everybody else can get straight there
// except for when using helpers
if (srcType == TYP_FLOAT
#if defined(_TARGET_ARM64_)
// Amd64: src = float, dst is overflow conversion.
// This goes through helper and hence src needs to be converted to double.
&& tree->gtOverflow()
#elif defined(_TARGET_AMD64_)
// Amd64: src = float, dst = uint64 or overflow conversion.
// This goes through helper and hence src needs to be converted to double.
&& (tree->gtOverflow() || (dstType == TYP_ULONG))
#elif defined(_TARGET_ARM_)
// Arm: src = float, dst = int64/uint64 or overflow conversion.
&& (tree->gtOverflow() || varTypeIsLong(dstType))
#endif
)
{
oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
}
// do we need to do it in two steps R -> I, '-> smallType
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
if (dstSize < genTypeSize(TYP_INT))
{
oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
tree->gtFlags &= ~GTF_UNSIGNED;
}
#else
if (dstSize < TARGET_POINTER_SIZE)
{
oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
}
#endif
else
{
/* Note that if we need to use a helper call then we can not morph oper */
if (!tree->gtOverflow())
{
#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
goto OPTIMIZECAST;
#else
switch (dstType)
{
case TYP_INT:
goto OPTIMIZECAST;
case TYP_UINT:
#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
goto OPTIMIZECAST;
#else // _TARGET_X86_
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
#endif // _TARGET_X86_
case TYP_LONG:
#ifdef _TARGET_AMD64_
// SSE2 has instructions to convert a float/double directly to a long
// TODO-X86-CQ: should this be enabled for x86 also?
goto OPTIMIZECAST;
#else // !_TARGET_AMD64_
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
#endif // !_TARGET_AMD64_
case TYP_ULONG:
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
default:
break;
}
#endif // _TARGET_ARM64_
}
else
{
switch (dstType)
{
case TYP_INT:
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
case TYP_UINT:
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
case TYP_LONG:
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
case TYP_ULONG:
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
default:
break;
}
}
noway_assert(!"Unexpected dstType");
}
}
#ifndef _TARGET_64BIT_
// The code generation phase (for x86 & ARM32) does not handle casts
// directly from [u]long to anything other than [u]int. Insert an
// intermediate cast to native int.
else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
{
oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
tree->gtFlags &= ~GTF_UNSIGNED;
}
#endif //!_TARGET_64BIT_
#ifdef _TARGET_ARM_
else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
!varTypeIsLong(oper->gtCast.CastOp()))
{
// optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
// except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
// This happens semi-frequently because there is no IL 'conv.r4.un'
oper->gtType = TYP_FLOAT;
oper->CastToType() = TYP_FLOAT;
return fgMorphTree(oper);
}
// converts long/ulong --> float/double casts into helper calls.
else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
{
if (dstType == TYP_FLOAT)
{
// there is only a double helper, so we
// - change the dsttype to double
// - insert a cast from double to float
// - recurse into the resulting tree
tree->CastToType() = TYP_DOUBLE;
tree->gtType = TYP_DOUBLE;
tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
return fgMorphTree(tree);
}
if (tree->gtFlags & GTF_UNSIGNED)
return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
}
#endif //_TARGET_ARM_
#ifdef _TARGET_AMD64_
// Do we have to do two step U4/8 -> R4/8 ?
// Codegen supports the following conversion as one-step operation
// a) Long -> R4/R8
// b) U8 -> R8
//
// The following conversions are performed as two-step operations using above.
// U4 -> R4/8 = U4-> Long -> R4/8
// U8 -> R4 = U8 -> R8 -> R4
else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
{
srcType = genUnsignedType(srcType);
if (srcType == TYP_ULONG)
{
if (dstType == TYP_FLOAT)
{
// Codegen can handle U8 -> R8 conversion.
// U8 -> R4 = U8 -> R8 -> R4
// - change the dsttype to double
// - insert a cast from double to float
// - recurse into the resulting tree
tree->CastToType() = TYP_DOUBLE;
tree->gtType = TYP_DOUBLE;
tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
return fgMorphTree(tree);
}
}
else if (srcType == TYP_UINT)
{
oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
tree->gtFlags &= ~GTF_UNSIGNED;
}
}
#endif // _TARGET_AMD64_
#ifdef _TARGET_X86_
// Do we have to do two step U4/8 -> R4/8 ?
else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
{
srcType = genUnsignedType(srcType);
if (srcType == TYP_ULONG)
{
return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
}
else if (srcType == TYP_UINT)
{
oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
tree->gtFlags &= ~GTF_UNSIGNED;
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
}
}
else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
{
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
}
#endif //_TARGET_X86_
else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
{
// We are casting away GC information. we would like to just
// change the type to int, however this gives the emitter fits because
// it believes the variable is a GC variable at the begining of the
// instruction group, but is not turned non-gc by the code generator
// we fix this by copying the GC pointer to a non-gc pointer temp.
noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
// We generate an assignment to an int and then do the cast from an int. With this we avoid
// the gc problem and we allow casts to bytes, longs, etc...
unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
oper->gtType = TYP_I_IMPL;
GenTree* asg = gtNewTempAssign(lclNum, oper);
oper->gtType = srcType;
// do the real cast
GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType);
// Generate the comma tree
oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
return fgMorphTree(oper);
}
// Look for narrowing casts ([u]long -> [u]int) and try to push them
// down into the operand before morphing it.
//
// It doesn't matter if this is cast is from ulong or long (i.e. if
// GTF_UNSIGNED is set) because the transformation is only applied to
// overflow-insensitive narrowing casts, which always silently truncate.
//
// Note that casts from [u]long to small integer types are handled above.
if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
{
// As a special case, look for overflow-sensitive casts of an AND
// expression, and see if the second operand is a small constant. Since
// the result of an AND is bound by its smaller operand, it may be
// possible to prove that the cast won't overflow, which will in turn
// allow the cast's operand to be transformed.
if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
{
GenTree* andOp2 = oper->gtOp.gtOp2;
// Special case to the special case: AND with a casted int.
if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
{
// gtFoldExprConst will deal with whether the cast is signed or
// unsigned, or overflow-sensitive.
andOp2 = gtFoldExprConst(andOp2);
oper->gtOp.gtOp2 = andOp2;
}
// Look for a constant less than 2^{32} for a cast to uint, or less
// than 2^{31} for a cast to int.
int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
{
// This cast can't overflow.
tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
}
}
// Only apply this transformation during global morph,
// when neither the cast node nor the oper node may throw an exception
// based on the upper 32 bits.
//
if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
{
// For these operations the lower 32 bits of the result only depends
// upon the lower 32 bits of the operands.
//
bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG);
// For long LSH cast to int, there is a discontinuity in behavior
// when the shift amount is 32 or larger.
//
// CAST(INT, LSH(1LL, 31)) == LSH(1, 31)
// LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31)
//
// CAST(INT, LSH(1LL, 32)) == 0
// LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1
//
// So some extra validation is needed.
//
if (oper->OperIs(GT_LSH))
{
GenTree* shiftAmount = oper->gtOp.gtOp2;
// Expose constant value for shift, if possible, to maximize the number
// of cases we can handle.
shiftAmount = gtFoldExpr(shiftAmount);
oper->gtOp.gtOp2 = shiftAmount;
#if DEBUG
// We may remorph the shift amount tree again later, so clear any morphed flag.
shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
if (shiftAmount->IsIntegralConst())
{
const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue();
if (shiftAmountValue >= 64)
{
// Shift amount is large enough that result is undefined.
// Don't try and optimize.
assert(!canPushCast);
}
else if (shiftAmountValue >= 32)
{
// Result of the shift is zero.
DEBUG_DESTROY_NODE(tree);
GenTree* zero = gtNewZeroConNode(TYP_INT);
return fgMorphTree(zero);
}
else if (shiftAmountValue >= 0)
{
// Shift amount is small enough that we can push the cast through.
canPushCast = true;
}
else
{
// Shift amount is negative and so result is undefined.
// Don't try and optimize.
assert(!canPushCast);
}
}
else
{
// Shift amount is unknown. We can't optimize this case.
assert(!canPushCast);
}
}
if (canPushCast)
{
DEBUG_DESTROY_NODE(tree);
// Insert narrowing casts for op1 and op2
oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType);
if (oper->gtOp.gtOp2 != nullptr)
{
oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType);
}
// Clear the GT_MUL_64RSLT if it is set
if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
{
oper->gtFlags &= ~GTF_MUL_64RSLT;
}
// The operation now produces a 32-bit result.
oper->gtType = TYP_INT;
// Remorph the new tree as the casts that we added may be folded away.
return fgMorphTree(oper);
}
}
}
OPTIMIZECAST:
noway_assert(tree->gtOper == GT_CAST);
/* Morph the operand */
tree->gtCast.CastOp() = oper = fgMorphTree(oper);
/* Reset the call flag */
tree->gtFlags &= ~GTF_CALL;
/* Reset the assignment flag */
tree->gtFlags &= ~GTF_ASG;
/* unless we have an overflow cast, reset the except flag */
if (!tree->gtOverflow())
{
tree->gtFlags &= ~GTF_EXCEPT;
}
/* Just in case new side effects were introduced */
tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper))
{
srcType = oper->TypeGet();
/* See if we can discard the cast */
if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
{
if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType))
{
if (varTypeIsSmall(srcType))
{
// Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the
// resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType
// must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is
// wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion
// from u4 to i2.
srcType = genActualType(srcType);
}
srcType = genUnsignedType(srcType);
}
if (srcType == dstType)
{ // Certainly if they are identical it is pointless
goto REMOVE_CAST;
}
if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
{
unsigned varNum = oper->gtLclVarCommon.gtLclNum;
LclVarDsc* varDsc = &lvaTable[varNum];
if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
{
goto REMOVE_CAST;
}
}
bool unsignedSrc = varTypeIsUnsigned(srcType);
bool unsignedDst = varTypeIsUnsigned(dstType);
bool signsDiffer = (unsignedSrc != unsignedDst);
unsigned srcSize = genTypeSize(srcType);
// For same sized casts with
// the same signs or non-overflow cast we discard them as well
if (srcSize == dstSize)
{
/* This should have been handled above */
noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
if (!signsDiffer)
{
goto REMOVE_CAST;
}
if (!tree->gtOverflow())
{
/* For small type casts, when necessary we force
the src operand to the dstType and allow the
implied load from memory to perform the casting */
if (varTypeIsSmall(srcType))
{
switch (oper->gtOper)
{
case GT_IND:
case GT_CLS_VAR:
case GT_LCL_FLD:
case GT_ARR_ELEM:
oper->gtType = dstType;
// We're changing the type here so we need to update the VN;
// in other cases we discard the cast without modifying oper
// so the VN doesn't change.
oper->SetVNsFromNode(tree);
goto REMOVE_CAST;
default:
break;
}
}
else
{
goto REMOVE_CAST;
}
}
}
else if (srcSize < dstSize) // widening cast
{
// Keep any long casts
if (dstSize == sizeof(int))
{
// Only keep signed to unsigned widening cast with overflow check
if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
{
goto REMOVE_CAST;
}
}
// Widening casts from unsigned or to signed can never overflow
if (unsignedSrc || !unsignedDst)
{
tree->gtFlags &= ~GTF_OVERFLOW;
if (!(oper->gtFlags & GTF_EXCEPT))
{
tree->gtFlags &= ~GTF_EXCEPT;
}
}
}
else // if (srcSize > dstSize)
{
// Try to narrow the operand of the cast and discard the cast
// Note: Do not narrow a cast that is marked as a CSE
// And do not narrow if the oper is marked as a CSE either
//
if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
{
optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
/* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
{
oper = oper->gtCast.CastOp();
}
goto REMOVE_CAST;
}
}
}
switch (oper->gtOper)
{
/* If the operand is a constant, we'll fold it */
case GT_CNS_INT:
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
{
GenTree* oldTree = tree;
tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
// Did we get a comma throw as a result of gtFoldExprConst?
if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
{
noway_assert(fgIsCommaThrow(tree));
tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
fgMorphTreeDone(tree);
return tree;
}
else if (tree->gtOper != GT_CAST)
{
return tree;
}
noway_assert(tree->gtCast.CastOp() == oper); // unchanged
}
break;
case GT_CAST:
/* Check for two consecutive casts into the same dstType */
if (!tree->gtOverflow())
{
var_types dstType2 = oper->CastToType();
if (dstType == dstType2)
{
goto REMOVE_CAST;
}
}
break;
case GT_COMMA:
// Check for cast of a GT_COMMA with a throw overflow
// Bug 110829: Since this optimization will bash the types
// neither oper or commaOp2 can be CSE candidates
if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
{
GenTree* commaOp2 = oper->gtOp.gtOp2;
if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
{
// need type of oper to be same as tree
if (tree->gtType == TYP_LONG)
{
commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
commaOp2->gtIntConCommon.SetLngValue(0);
/* Change the types of oper and commaOp2 to TYP_LONG */
oper->gtType = commaOp2->gtType = TYP_LONG;
}
else if (varTypeIsFloating(tree->gtType))
{
commaOp2->ChangeOperConst(GT_CNS_DBL);
commaOp2->gtDblCon.gtDconVal = 0.0;
// Change the types of oper and commaOp2
oper->gtType = commaOp2->gtType = tree->gtType;
}
else
{
commaOp2->ChangeOperConst(GT_CNS_INT);
commaOp2->gtIntCon.gtIconVal = 0;
/* Change the types of oper and commaOp2 to TYP_INT */
oper->gtType = commaOp2->gtType = TYP_INT;
}
}
if (vnStore != nullptr)
{
fgValueNumberTreeConst(commaOp2);
}
/* Return the GT_COMMA node as the new tree */
return oper;
}
break;
default:
break;
} /* end switch (oper->gtOper) */
}
if (tree->gtOverflow())
{
fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
}
return tree;
REMOVE_CAST:
/* Here we've eliminated the cast, so just return it's operand */
assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
DEBUG_DESTROY_NODE(tree);
return oper;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif
/*****************************************************************************
*
* Perform an unwrap operation on a Proxy object
*/
GenTree* Compiler::fgUnwrapProxy(GenTree* objRef)
{
assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
CORINFO_EE_INFO* pInfo = eeGetEEInfo();
GenTree* addTree;
// Perform the unwrap:
//
// This requires two extra indirections.
// We mark these indirections as 'invariant' and
// the CSE logic will hoist them when appropriate.
//
// Note that each dereference is a GC pointer
addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
objRef->gtFlags |= GTF_IND_INVARIANT;
addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
objRef->gtFlags |= GTF_IND_INVARIANT;
// objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
return objRef;
}
/*****************************************************************************
*
* Morph an argument list; compute the pointer argument count in the process.
*
* NOTE: This function can be called from any place in the JIT to perform re-morphing
* due to graph altering modifications such as copy / constant propagation
*/
unsigned UpdateGT_LISTFlags(GenTree* tree)
{
assert(tree->gtOper == GT_LIST);
unsigned flags = 0;
if (tree->gtOp.gtOp2)
{
flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
}
flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
tree->gtFlags &= ~GTF_ALL_EFFECT;
tree->gtFlags |= flags;
return tree->gtFlags;
}
#ifdef DEBUG
void fgArgTabEntry::Dump()
{
printf("fgArgTabEntry[arg %u", argNum);
printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
if (regNum != REG_STK)
{
printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
for (unsigned i = 0; i < numRegs; i++)
{
printf(" %s", getRegName(regNums[i]));
}
}
if (numSlots > 0)
{
printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
}
printf(", align=%u", alignment);
if (lateArgInx != (unsigned)-1)
{
printf(", lateArgInx=%u", lateArgInx);
}
if (isSplit)
{
printf(", isSplit");
}
if (needTmp)
{
printf(", tmpNum=V%02u", tmpNum);
}
if (needPlace)
{
printf(", needPlace");
}
if (isTmp)
{
printf(", isTmp");
}
if (processed)
{
printf(", processed");
}
if (isHfaRegArg)
{
printf(", isHfa");
}
if (isBackFilled)
{
printf(", isBackFilled");
}
if (isNonStandard)
{
printf(", isNonStandard");
}
if (isStruct)
{
printf(", isStruct");
}
printf("]\n");
}
#endif
fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
{
compiler = comp;
callTree = call;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = 0;
#if defined(UNIX_X86_ABI)
alignmentDone = false;
stkSizeBytes = 0;
padStkAlign = 0;
#endif
#if FEATURE_FIXED_OUT_ARGS
outArgSize = 0;
#endif
argTableSize = numArgs; // the allocated table size
hasRegArgs = false;
hasStackArgs = false;
argsComplete = false;
argsSorted = false;
if (argTableSize == 0)
{
argTable = nullptr;
}
else
{
argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
}
}
/*****************************************************************************
*
* fgArgInfo Copy Constructor
*
* This method needs to act like a copy constructor for fgArgInfo.
* The newCall needs to have its fgArgInfo initialized such that
* we have newCall that is an exact copy of the oldCall.
* We have to take care since the argument information
* in the argTable contains pointers that must point to the
* new arguments and not the old arguments.
*/
fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
{
fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo;
compiler = oldArgInfo->compiler;
callTree = newCall;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = oldArgInfo->stkLevel;
#if defined(UNIX_X86_ABI)
alignmentDone = oldArgInfo->alignmentDone;
stkSizeBytes = oldArgInfo->stkSizeBytes;
padStkAlign = oldArgInfo->padStkAlign;
#endif
#if FEATURE_FIXED_OUT_ARGS
outArgSize = oldArgInfo->outArgSize;
#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
argTable = nullptr;
if (argTableSize > 0)
{
argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
for (unsigned inx = 0; inx < argTableSize; inx++)
{
argTable[inx] = nullptr;
}
}
assert(oldArgInfo->argsComplete);
// We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
// so we can iterate over these argument lists more uniformly.
// Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
GenTreeArgList* newArgs;
GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
GenTreeArgList* oldArgs;
GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
if (newCall->gtCallObjp == nullptr)
{
assert(oldCall->gtCallObjp == nullptr);
newArgs = newCall->gtCallArgs;
oldArgs = oldCall->gtCallArgs;
}
else
{
assert(oldCall->gtCallObjp != nullptr);
newArgObjp.Current() = newCall->gtCallArgs;
newArgs = &newArgObjp;
oldArgObjp.Current() = oldCall->gtCallObjp;
oldArgs = &oldArgObjp;
}
GenTree* newCurr;
GenTree* oldCurr;
GenTreeArgList* newParent = nullptr;
GenTreeArgList* oldParent = nullptr;
fgArgTabEntry** oldArgTable = oldArgInfo->argTable;
bool scanRegArgs = false;
while (newArgs)
{
/* Get hold of the next argument values for the oldCall and newCall */
newCurr = newArgs->Current();
oldCurr = oldArgs->Current();
if (newArgs != &newArgObjp)
{
newParent = newArgs;
oldParent = oldArgs;
}
else
{
assert(newParent == nullptr && oldParent == nullptr);
}
newArgs = newArgs->Rest();
oldArgs = oldArgs->Rest();
fgArgTabEntry* oldArgTabEntry = nullptr;
fgArgTabEntry* newArgTabEntry = nullptr;
for (unsigned inx = 0; inx < argTableSize; inx++)
{
oldArgTabEntry = oldArgTable[inx];
if (oldArgTabEntry->parent == oldParent)
{
assert((oldParent == nullptr) == (newParent == nullptr));
// We have found the matching "parent" field in oldArgTabEntry
newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
// First block copy all fields
//
*newArgTabEntry = *oldArgTabEntry;
// Then update all GenTree* fields in the newArgTabEntry
//
newArgTabEntry->parent = newParent;
// The node field is likely to have been updated
// to point at a node in the gtCallLateArgs list
//
if (oldArgTabEntry->node == oldCurr)
{
// node is not pointing into the gtCallLateArgs list
newArgTabEntry->node = newCurr;
}
else
{
// node must be pointing into the gtCallLateArgs list
//
// We will fix this pointer up in the next loop
//
newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
scanRegArgs = true;
}
// Now initialize the proper element in the argTable array
//
argTable[inx] = newArgTabEntry;
break;
}
}
// We should have found the matching oldArgTabEntry and created the newArgTabEntry
//
assert(newArgTabEntry != nullptr);
}
if (scanRegArgs)
{
newArgs = newCall->gtCallLateArgs;
oldArgs = oldCall->gtCallLateArgs;
while (newArgs)
{
/* Get hold of the next argument values for the oldCall and newCall */
assert(newArgs->OperIsList());
newCurr = newArgs->Current();
newArgs = newArgs->Rest();
assert(oldArgs->OperIsList());
oldCurr = oldArgs->Current();
oldArgs = oldArgs->Rest();
fgArgTabEntry* oldArgTabEntry = nullptr;
fgArgTabEntry* newArgTabEntry = nullptr;
for (unsigned inx = 0; inx < argTableSize; inx++)
{
oldArgTabEntry = oldArgTable[inx];
if (oldArgTabEntry->node == oldCurr)
{
// We have found the matching "node" field in oldArgTabEntry
newArgTabEntry = argTable[inx];
assert(newArgTabEntry != nullptr);
// update the "node" GenTree* fields in the newArgTabEntry
//
assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
newArgTabEntry->node = newCurr;
break;
}
}
}
}
argCount = oldArgInfo->argCount;
nextSlotNum = oldArgInfo->nextSlotNum;
hasRegArgs = oldArgInfo->hasRegArgs;
hasStackArgs = oldArgInfo->hasStackArgs;
argsComplete = true;
argsSorted = true;
}
void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
{
assert(argCount < argTableSize);
argTable[argCount] = curArgTabEntry;
argCount++;
}
fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
GenTree* node,
GenTree* parent,
regNumber regNum,
unsigned numRegs,
unsigned alignment,
bool isStruct,
bool isVararg /*=false*/)
{
fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
// Any additional register numbers are set by the caller.
// This is primarily because on ARM we don't yet know if it
// will be split or if it is a double HFA, so the number of registers
// may actually be less.
curArgTabEntry->setRegNum(0, regNum);
curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
curArgTabEntry->parent = parent;
curArgTabEntry->slotNum = 0;
curArgTabEntry->numRegs = numRegs;
curArgTabEntry->numSlots = 0;
curArgTabEntry->alignment = alignment;
curArgTabEntry->lateArgInx = (unsigned)-1;
curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
curArgTabEntry->needPlace = false;
curArgTabEntry->processed = false;
#ifdef FEATURE_HFA
curArgTabEntry->_isHfaRegArg = false;
#endif
curArgTabEntry->isBackFilled = false;
curArgTabEntry->isNonStandard = false;
curArgTabEntry->isStruct = isStruct;
curArgTabEntry->isVararg = isVararg;
hasRegArgs = true;
AddArg(curArgTabEntry);
return curArgTabEntry;
}
#if defined(UNIX_AMD64_ABI)
fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
GenTree* node,
GenTree* parent,
regNumber regNum,
unsigned numRegs,
unsigned alignment,
const bool isStruct,
const bool isVararg,
const regNumber otherRegNum,
const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
{
fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg);
assert(curArgTabEntry != nullptr);
curArgTabEntry->isStruct = isStruct; // is this a struct arg
curArgTabEntry->checkIsStruct();
assert(numRegs <= 2);
if (numRegs == 2)
{
curArgTabEntry->setRegNum(1, otherRegNum);
}
if (isStruct && structDescPtr != nullptr)
{
curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
}
return curArgTabEntry;
}
#endif // defined(UNIX_AMD64_ABI)
fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
GenTree* node,
GenTree* parent,
unsigned numSlots,
unsigned alignment,
bool isStruct,
bool isVararg /*=false*/)
{
fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
curArgTabEntry->setRegNum(0, REG_STK);
curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
curArgTabEntry->parent = parent;
curArgTabEntry->slotNum = nextSlotNum;
curArgTabEntry->numRegs = 0;
curArgTabEntry->numSlots = numSlots;
curArgTabEntry->alignment = alignment;
curArgTabEntry->lateArgInx = (unsigned)-1;
curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
curArgTabEntry->needPlace = false;
curArgTabEntry->processed = false;
#ifdef FEATURE_HFA
curArgTabEntry->_isHfaRegArg = false;
#endif
curArgTabEntry->isBackFilled = false;
curArgTabEntry->isNonStandard = false;
curArgTabEntry->isStruct = isStruct;
curArgTabEntry->isVararg = isVararg;
hasStackArgs = true;
AddArg(curArgTabEntry);
nextSlotNum += numSlots;
return curArgTabEntry;
}
void fgArgInfo::RemorphReset()
{
nextSlotNum = INIT_ARG_STACK_SLOT;
}
fgArgTabEntry* fgArgInfo::RemorphRegArg(
unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
{
fgArgTabEntry* curArgTabEntry = nullptr;
unsigned regArgInx = 0;
unsigned inx;
for (inx = 0; inx < argCount; inx++)
{
curArgTabEntry = argTable[inx];
if (curArgTabEntry->argNum == argNum)
{
break;
}
bool isRegArg;
GenTree* argx;
if (curArgTabEntry->parent != nullptr)
{
assert(curArgTabEntry->parent->OperIsList());
argx = curArgTabEntry->parent->Current();
isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
}
else
{
argx = curArgTabEntry->node;
isRegArg = true;
}
if (isRegArg)
{
regArgInx++;
}
}
// if this was a nonstandard arg the table is definitive
if (curArgTabEntry->isNonStandard)
{
regNum = curArgTabEntry->regNum;
}
assert(curArgTabEntry->argNum == argNum);
assert(curArgTabEntry->regNum == regNum);
assert(curArgTabEntry->alignment == alignment);
assert(curArgTabEntry->parent == parent);
if (curArgTabEntry->node != node)
{
GenTree* argx = nullptr;
unsigned regIndex = 0;
/* process the register argument list */
for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
{
argx = list->Current();
assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
if (regIndex == regArgInx)
{
break;
}
}
assert(regIndex == regArgInx);
assert(regArgInx == curArgTabEntry->lateArgInx);
if (curArgTabEntry->node != argx)
{
curArgTabEntry->node = argx;
}
}
return curArgTabEntry;
}
void fgArgInfo::RemorphStkArg(unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment)
{
fgArgTabEntry* curArgTabEntry = nullptr;
bool isRegArg = false;
unsigned regArgInx = 0;
GenTree* argx;
unsigned inx;
for (inx = 0; inx < argCount; inx++)
{
curArgTabEntry = argTable[inx];
if (curArgTabEntry->parent != nullptr)
{
assert(curArgTabEntry->parent->OperIsList());
argx = curArgTabEntry->parent->Current();
isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
}
else
{
argx = curArgTabEntry->node;
isRegArg = true;
}
if (curArgTabEntry->argNum == argNum)
{
break;
}
if (isRegArg)
{
regArgInx++;
}
}
nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
assert(curArgTabEntry->argNum == argNum);
assert(curArgTabEntry->slotNum == nextSlotNum);
assert(curArgTabEntry->numSlots == numSlots);
assert(curArgTabEntry->alignment == alignment);
assert(curArgTabEntry->parent == parent);
assert(parent->OperIsList());
#if FEATURE_FIXED_OUT_ARGS
if (curArgTabEntry->node != node)
{
if (isRegArg)
{
GenTree* argx = nullptr;
unsigned regIndex = 0;
/* process the register argument list */
for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
{
argx = list->Current();
assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
if (regIndex == regArgInx)
{
break;
}
}
assert(regIndex == regArgInx);
assert(regArgInx == curArgTabEntry->lateArgInx);
if (curArgTabEntry->node != argx)
{
curArgTabEntry->node = argx;
}
}
else
{
assert(parent->Current() == node);
curArgTabEntry->node = node;
}
}
#else
curArgTabEntry->node = node;
#endif
nextSlotNum += numSlots;
}
void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
{
fgArgTabEntry* curArgTabEntry = nullptr;
assert(argNum < argCount);
for (unsigned inx = 0; inx < argCount; inx++)
{
curArgTabEntry = argTable[inx];
if (curArgTabEntry->argNum == argNum)
{
break;
}
}
assert(numRegs > 0);
assert(numSlots > 0);
if (argsComplete)
{
assert(curArgTabEntry->isSplit == true);
assert(curArgTabEntry->numRegs == numRegs);
assert(curArgTabEntry->numSlots == numSlots);
assert(hasStackArgs == true);
}
else
{
curArgTabEntry->isSplit = true;
curArgTabEntry->numRegs = numRegs;
curArgTabEntry->numSlots = numSlots;
hasStackArgs = true;
}
nextSlotNum += numSlots;
}
void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTree* newNode)
{
fgArgTabEntry* curArgTabEntry = nullptr;
assert(argNum < argCount);
for (unsigned inx = 0; inx < argCount; inx++)
{
curArgTabEntry = argTable[inx];
if (curArgTabEntry->argNum == argNum)
{
break;
}
}
assert(curArgTabEntry->parent->Current() == newNode);
curArgTabEntry->node = newNode;
curArgTabEntry->tmpNum = tmpNum;
curArgTabEntry->isTmp = true;
}
void fgArgInfo::ArgsComplete()
{
bool hasStackArgs = false;
bool hasStructRegArg = false;
for (unsigned curInx = 0; curInx < argCount; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
assert(curArgTabEntry != nullptr);
GenTree* argx = curArgTabEntry->node;
if (curArgTabEntry->regNum == REG_STK)
{
hasStackArgs = true;
#if !FEATURE_FIXED_OUT_ARGS
// On x86 we use push instructions to pass arguments:
// The non-register arguments are evaluated and pushed in order
// and they are never evaluated into temps
//
continue;
#endif
}
#if FEATURE_ARG_SPLIT
else if (curArgTabEntry->isSplit)
{
hasStructRegArg = true;
hasStackArgs = true;
}
#endif // FEATURE_ARG_SPLIT
else // we have a register argument, next we look for a struct type.
{
if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct))
{
hasStructRegArg = true;
}
}
/* If the argument tree contains an assignment (GTF_ASG) then the argument and
and every earlier argument (except constants) must be evaluated into temps
since there may be other arguments that follow and they may use the value being assigned.
EXAMPLE: ArgTab is "a, a=5, a"
-> when we see the second arg "a=5"
we know the first two arguments "a, a=5" have to be evaluated into temps
For the case of an assignment, we only know that there exist some assignment someplace
in the tree. We don't know what is being assigned so we are very conservative here
and assume that any local variable could have been assigned.
*/
if (argx->gtFlags & GTF_ASG)
{
// If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
// a tmp, then we need a temp in the late arg list.
if ((argCount > 1) || argx->OperIsCopyBlkOp()
#ifdef FEATURE_FIXED_OUT_ARGS
|| curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
// that we only have late non-register args when that feature is on.
#endif // FEATURE_FIXED_OUT_ARGS
)
{
curArgTabEntry->needTmp = true;
}
// For all previous arguments, unless they are a simple constant
// we require that they be evaluated into temps
for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
{
fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
assert(prevArgTabEntry->node);
if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
{
prevArgTabEntry->needTmp = true;
}
}
}
bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0);
#if FEATURE_FIXED_OUT_ARGS
// Like calls, if this argument has a tree that will do an inline throw,
// a call to a jit helper, then we need to treat it like a call (but only
// if there are/were any stack args).
// This means unnesting, sorting, etc. Technically this is overly
// conservative, but I want to avoid as much special-case debug-only code
// as possible, so leveraging the GTF_CALL flag is the easiest.
//
if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode &&
(compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
{
for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
{
if (otherInx == curInx)
{
continue;
}
if (argTable[otherInx]->regNum == REG_STK)
{
treatLikeCall = true;
break;
}
}
}
#endif // FEATURE_FIXED_OUT_ARGS
/* If it contains a call (GTF_CALL) then itself and everything before the call
with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
has to be kept in the right order since we will move the call to the first position)
For calls we don't have to be quite as conservative as we are with an assignment
since the call won't be modifying any non-address taken LclVars.
*/
if (treatLikeCall)
{
if (argCount > 1) // If this is not the only argument
{
curArgTabEntry->needTmp = true;
}
else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
{
// Spill all arguments that are floating point calls
curArgTabEntry->needTmp = true;
}
// All previous arguments may need to be evaluated into temps
for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
{
fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
assert(prevArgTabEntry->node);
// For all previous arguments, if they have any GTF_ALL_EFFECT
// we require that they be evaluated into a temp
if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
{
prevArgTabEntry->needTmp = true;
}
#if FEATURE_FIXED_OUT_ARGS
// Or, if they are stored into the FIXED_OUT_ARG area
// we require that they be moved to the gtCallLateArgs
// and replaced with a placeholder node
else if (prevArgTabEntry->regNum == REG_STK)
{
prevArgTabEntry->needPlace = true;
}
#if FEATURE_ARG_SPLIT
else if (prevArgTabEntry->isSplit)
{
prevArgTabEntry->needPlace = true;
}
#endif // _TARGET_ARM_
#endif
}
}
#if FEATURE_MULTIREG_ARGS
// For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
// with multiple indirections, so here we consider spilling it into a tmp LclVar.
//
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_ARM_
bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1);
#else
bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
#endif
if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false))
{
if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
{
// Spill multireg struct arguments that have Assignments or Calls embedded in them
curArgTabEntry->needTmp = true;
}
else
{
// We call gtPrepareCost to measure the cost of evaluating this tree
compiler->gtPrepareCost(argx);
if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
{
// Spill multireg struct arguments that are expensive to evaluate twice
curArgTabEntry->needTmp = true;
}
#if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_)
else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
{
// SIMD types do not need the optimization below due to their sizes
if (argx->OperIs(GT_SIMD) || (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
argx->AsObj()->gtOp1->gtOp.gtOp1->OperIs(GT_SIMD)))
{
curArgTabEntry->needTmp = true;
}
}
#endif
#ifndef _TARGET_ARM_
// TODO-Arm: This optimization is not implemented for ARM32
// so we skip this for ARM32 until it is ported to use RyuJIT backend
//
else if (argx->OperGet() == GT_OBJ)
{
GenTreeObj* argObj = argx->AsObj();
CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
switch (structSize)
{
case 3:
case 5:
case 6:
case 7:
// If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
//
if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
{
// If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
// For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
//
curArgTabEntry->needTmp = true;
}
break;
case 11:
case 13:
case 14:
case 15:
// Spill any GT_OBJ multireg structs that are difficult to extract
//
// When we have a GT_OBJ of a struct with the above sizes we would need
// to use 3 or 4 load instructions to load the exact size of this struct.
// Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
// will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
// Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
// the argument.
//
curArgTabEntry->needTmp = true;
break;
default:
break;
}
}
#endif // !_TARGET_ARM_
}
}
#endif // FEATURE_MULTIREG_ARGS
}
// We only care because we can't spill structs and qmarks involve a lot of spilling, but
// if we don't have qmarks, then it doesn't matter.
// So check for Qmark's globally once here, instead of inside the loop.
//
const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
#if FEATURE_FIXED_OUT_ARGS
// For Arm/x64 we only care because we can't reorder a register
// argument that uses GT_LCLHEAP. This is an optimization to
// save a check inside the below loop.
//
const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
#else
const bool hasStackArgsWeCareAbout = hasStackArgs;
#endif // FEATURE_FIXED_OUT_ARGS
// If we have any stack args we have to force the evaluation
// of any arguments passed in registers that might throw an exception
//
// Technically we only a required to handle the following two cases:
// a GT_IND with GTF_IND_RNGCHK (only on x86) or
// a GT_LCLHEAP node that allocates stuff on the stack
//
if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
{
for (unsigned curInx = 0; curInx < argCount; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
assert(curArgTabEntry != nullptr);
GenTree* argx = curArgTabEntry->node;
// Examine the register args that are currently not marked needTmp
//
if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
{
if (hasStackArgsWeCareAbout)
{
#if !FEATURE_FIXED_OUT_ARGS
// On x86 we previously recorded a stack depth of zero when
// morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
// Thus we can not reorder the argument after any stack based argument
// (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
// check for it explicitly.)
//
if (argx->gtFlags & GTF_EXCEPT)
{
curArgTabEntry->needTmp = true;
continue;
}
#else
// For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
//
if (argx->gtFlags & GTF_EXCEPT)
{
assert(compiler->compLocallocUsed);
// Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
//
if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
{
curArgTabEntry->needTmp = true;
continue;
}
}
#endif
}
if (hasStructRegArgWeCareAbout)
{
// Returns true if a GT_QMARK node is encountered in the argx tree
//
if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
{
curArgTabEntry->needTmp = true;
continue;
}
}
}
}
}
argsComplete = true;
}
void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
#ifdef DEBUG
if (compiler->verbose)
{
printf("\nSorting the arguments:\n");
}
#endif
/* Shuffle the arguments around before we build the gtCallLateArgs list.
The idea is to move all "simple" arguments like constants and local vars
to the end of the table, and move the complex arguments towards the beginning
of the table. This will help prevent registers from being spilled by
allowing us to evaluate the more complex arguments before the simpler arguments.
The argTable ends up looking like:
+------------------------------------+ <--- argTable[argCount - 1]
| constants |
+------------------------------------+
| local var / local field |
+------------------------------------+
| remaining arguments sorted by cost |
+------------------------------------+
| temps (argTable[].needTmp = true) |
+------------------------------------+
| args with calls (GTF_CALL) |
+------------------------------------+ <--- argTable[0]
*/
/* Set the beginning and end for the new argument table */
unsigned curInx;
int regCount = 0;
unsigned begTab = 0;
unsigned endTab = argCount - 1;
unsigned argsRemaining = argCount;
// First take care of arguments that are constants.
// [We use a backward iterator pattern]
//
curInx = argCount;
do
{
curInx--;
fgArgTabEntry* curArgTabEntry = argTable[curInx];
if (curArgTabEntry->regNum != REG_STK)
{
regCount++;
}
// Skip any already processed args
//
if (!curArgTabEntry->processed)
{
GenTree* argx = curArgTabEntry->node;
// put constants at the end of the table
//
if (argx->gtOper == GT_CNS_INT)
{
noway_assert(curInx <= endTab);
curArgTabEntry->processed = true;
// place curArgTabEntry at the endTab position by performing a swap
//
if (curInx != endTab)
{
argTable[curInx] = argTable[endTab];
argTable[endTab] = curArgTabEntry;
}
endTab--;
argsRemaining--;
}
}
} while (curInx > 0);
if (argsRemaining > 0)
{
// Next take care of arguments that are calls.
// [We use a forward iterator pattern]
//
for (curInx = begTab; curInx <= endTab; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
// Skip any already processed args
//
if (!curArgTabEntry->processed)
{
GenTree* argx = curArgTabEntry->node;
// put calls at the beginning of the table
//
if (argx->gtFlags & GTF_CALL)
{
curArgTabEntry->processed = true;
// place curArgTabEntry at the begTab position by performing a swap
//
if (curInx != begTab)
{
argTable[curInx] = argTable[begTab];
argTable[begTab] = curArgTabEntry;
}
begTab++;
argsRemaining--;
}
}
}
}
if (argsRemaining > 0)
{
// Next take care arguments that are temps.
// These temps come before the arguments that are
// ordinary local vars or local fields
// since this will give them a better chance to become
// enregistered into their actual argument register.
// [We use a forward iterator pattern]
//
for (curInx = begTab; curInx <= endTab; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
// Skip any already processed args
//
if (!curArgTabEntry->processed)
{
if (curArgTabEntry->needTmp)
{
curArgTabEntry->processed = true;
// place curArgTabEntry at the begTab position by performing a swap
//
if (curInx != begTab)
{
argTable[curInx] = argTable[begTab];
argTable[begTab] = curArgTabEntry;
}
begTab++;
argsRemaining--;
}
}
}
}
if (argsRemaining > 0)
{
// Next take care of local var and local field arguments.
// These are moved towards the end of the argument evaluation.
// [We use a backward iterator pattern]
//
curInx = endTab + 1;
do
{
curInx--;
fgArgTabEntry* curArgTabEntry = argTable[curInx];
// Skip any already processed args
//
if (!curArgTabEntry->processed)
{
GenTree* argx = curArgTabEntry->node;
if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
{
noway_assert(curInx <= endTab);
curArgTabEntry->processed = true;
// place curArgTabEntry at the endTab position by performing a swap
//
if (curInx != endTab)
{
argTable[curInx] = argTable[endTab];
argTable[endTab] = curArgTabEntry;
}
endTab--;
argsRemaining--;
}
}
} while (curInx > begTab);
}
// Finally, take care of all the remaining arguments.
// Note that we fill in one arg at a time using a while loop.
bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
while (argsRemaining > 0)
{
/* Find the most expensive arg remaining and evaluate it next */
fgArgTabEntry* expensiveArgTabEntry = nullptr;
unsigned expensiveArg = UINT_MAX;
unsigned expensiveArgCost = 0;
// [We use a forward iterator pattern]
//
for (curInx = begTab; curInx <= endTab; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
// Skip any already processed args
//
if (!curArgTabEntry->processed)
{
GenTree* argx = curArgTabEntry->node;
// We should have already handled these kinds of args
assert(argx->gtOper != GT_LCL_VAR);
assert(argx->gtOper != GT_LCL_FLD);
assert(argx->gtOper != GT_CNS_INT);
// This arg should either have no persistent side effects or be the last one in our table
// assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
if (argsRemaining == 1)
{
// This is the last arg to place
expensiveArg = curInx;
expensiveArgTabEntry = curArgTabEntry;
assert(begTab == endTab);
break;
}
else
{
if (!costsPrepared)
{
/* We call gtPrepareCost to measure the cost of evaluating this tree */
compiler->gtPrepareCost(argx);
}
if (argx->gtCostEx > expensiveArgCost)
{
// Remember this arg as the most expensive one that we have yet seen
expensiveArgCost = argx->gtCostEx;
expensiveArg = curInx;
expensiveArgTabEntry = curArgTabEntry;
}
}
}
}
noway_assert(expensiveArg != UINT_MAX);
// put the most expensive arg towards the beginning of the table
expensiveArgTabEntry->processed = true;
// place expensiveArgTabEntry at the begTab position by performing a swap
//
if (expensiveArg != begTab)
{
argTable[expensiveArg] = argTable[begTab];
argTable[begTab] = expensiveArgTabEntry;
}
begTab++;
argsRemaining--;
costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
}
// The table should now be completely filled and thus begTab should now be adjacent to endTab
// and regArgsRemaining should be zero
assert(begTab == (endTab + 1));
assert(argsRemaining == 0);
#if !FEATURE_FIXED_OUT_ARGS
// Finally build the regArgList
//
callTree->gtCall.regArgList = NULL;
callTree->gtCall.regArgListCount = regCount;
unsigned regInx = 0;
for (curInx = 0; curInx < argCount; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
if (curArgTabEntry->regNum != REG_STK)
{
// Encode the argument register in the register mask
//
callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
regInx++;
}
}
#endif // !FEATURE_FIXED_OUT_ARGS
argsSorted = true;
}
#ifdef DEBUG
void fgArgInfo::Dump(Compiler* compiler)
{
for (unsigned curInx = 0; curInx < ArgCount(); curInx++)
{
fgArgTabEntry* curArgEntry = ArgTable()[curInx];
curArgEntry->Dump();
}
}
#endif
//------------------------------------------------------------------------------
// fgMakeTmpArgNode : This function creates a tmp var only if needed.
// We need this to be done in order to enforce ordering
// of the evaluation of arguments.
//
// Arguments:
// curArgTabEntry
//
// Return Value:
// the newly created temp var tree.
GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
{
unsigned tmpVarNum = curArgTabEntry->tmpNum;
LclVarDsc* varDsc = &lvaTable[tmpVarNum];
assert(varDsc->lvIsTemp);
var_types type = varDsc->TypeGet();
// Create a copy of the temp to go into the late argument list
GenTree* arg = gtNewLclvNode(tmpVarNum, type);
GenTree* addrNode = nullptr;
if (varTypeIsStruct(type))
{
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
// Can this type be passed as a primitive type?
// If so, the following call will return the corresponding primitive type.
// Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type.
bool passedAsPrimitive = false;
if (curArgTabEntry->isSingleRegOrSlot())
{
CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
var_types structBaseType =
getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->isVararg);
if (structBaseType != TYP_UNKNOWN)
{
passedAsPrimitive = true;
#if defined(UNIX_AMD64_ABI)
// TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry,
// and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take
// a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again.
//
if (genIsValidFloatReg(curArgTabEntry->regNum))
{
if (structBaseType == TYP_INT)
{
structBaseType = TYP_FLOAT;
}
else
{
assert(structBaseType == TYP_LONG);
structBaseType = TYP_DOUBLE;
}
}
#endif
type = structBaseType;
}
}
// If it is passed in registers, don't get the address of the var. Make it a
// field instead. It will be loaded in registers with putarg_reg tree in lower.
if (passedAsPrimitive)
{
arg->ChangeOper(GT_LCL_FLD);
arg->gtType = type;
}
else
{
var_types addrType = TYP_BYREF;
arg = gtNewOperNode(GT_ADDR, addrType, arg);
addrNode = arg;
#if FEATURE_MULTIREG_ARGS
#ifdef _TARGET_ARM64_
assert(varTypeIsStruct(type));
if (lvaIsMultiregStruct(varDsc, curArgTabEntry->isVararg))
{
// ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
// as that is how UNIX_AMD64_ABI works.
// We will create a GT_OBJ for the argument below.
// This will be passed by value in two registers.
assert(addrNode != nullptr);
// Create an Obj of the temp to use it as a call argument.
arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
// TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
// this is only to preserve former behavior (though some CSE'ing of struct
// values can be pessimizing, so enabling this may require some additional tuning).
arg->gtFlags |= GTF_DONT_CSE;
}
#else
// Always create an Obj of the temp to use it as a call argument.
arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
arg->gtFlags |= GTF_DONT_CSE;
#endif // !_TARGET_ARM64_
#endif // FEATURE_MULTIREG_ARGS
}
#else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_)
// other targets, we pass the struct by value
assert(varTypeIsStruct(type));
addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
// Get a new Obj node temp to use it as a call argument.
// gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_)
} // (varTypeIsStruct(type))
if (addrNode != nullptr)
{
assert(addrNode->gtOper == GT_ADDR);
// This will prevent this LclVar from being optimized away
lvaSetVarAddrExposed(tmpVarNum);
// the child of a GT_ADDR is required to have this flag set
addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
}
return arg;
}
//------------------------------------------------------------------------------
// EvalArgsToTemps : Create temp assignments and populate the LateArgs list.
void fgArgInfo::EvalArgsToTemps()
{
assert(argsSorted == true);
unsigned regArgInx = 0;
// Now go through the argument table and perform the necessary evaluation into temps
GenTreeArgList* tmpRegArgNext = nullptr;
for (unsigned curInx = 0; curInx < argCount; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
GenTree* argx = curArgTabEntry->node;
GenTree* setupArg = nullptr;
GenTree* defArg;
#if !FEATURE_FIXED_OUT_ARGS
// Only ever set for FEATURE_FIXED_OUT_ARGS
assert(curArgTabEntry->needPlace == false);
// On x86 and other archs that use push instructions to pass arguments:
// Only the register arguments need to be replaced with placeholder nodes.
// Stacked arguments are evaluated and pushed (or stored into the stack) in order.
//
if (curArgTabEntry->regNum == REG_STK)
continue;
#endif
if (curArgTabEntry->needTmp)
{
if (curArgTabEntry->isTmp == true)
{
// Create a copy of the temp to go into the late argument list
defArg = compiler->fgMakeTmpArgNode(curArgTabEntry);
// mark the original node as a late argument
argx->gtFlags |= GTF_LATE_ARG;
}
else
{
// Create a temp assignment for the argument
// Put the temp in the gtCallLateArgs list
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (compiler->verbose)
{
printf("Argument with 'side effect'...\n");
compiler->gtDispTree(argx);
}
#endif
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
noway_assert(argx->gtType != TYP_STRUCT);
#endif
unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
if (argx->gtOper == GT_MKREFANY)
{
// For GT_MKREFANY, typically the actual struct copying does
// not have any side-effects and can be delayed. So instead
// of using a temp for the whole struct, we can just use a temp
// for operand that that has a side-effect
GenTree* operand;
if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
{
operand = argx->gtOp.gtOp1;
// In the early argument evaluation, place an assignment to the temp
// from the source operand of the mkrefany
setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
// Replace the operand for the mkrefany with the new temp.
argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
}
else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
{
operand = argx->gtOp.gtOp2;
// In the early argument evaluation, place an assignment to the temp
// from the source operand of the mkrefany
setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
// Replace the operand for the mkrefany with the new temp.
argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
}
}
if (setupArg != nullptr)
{
// Now keep the mkrefany for the late argument list
defArg = argx;
// Clear the side-effect flags because now both op1 and op2 have no side-effects
defArg->gtFlags &= ~GTF_ALL_EFFECT;
}
else
{
setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
if (compiler->fgOrder == Compiler::FGOrderLinear)
{
// We'll reference this temporary variable just once
// when we perform the function call after
// setting up this argument.
varDsc->setLvRefCnt(1);
}
var_types lclVarType = genActualType(argx->gtType);
var_types scalarType = TYP_UNKNOWN;
if (setupArg->OperIsCopyBlkOp())
{
setupArg = compiler->fgMorphCopyBlock(setupArg);
#if defined(_TARGET_ARMARCH_)
// This scalar LclVar widening step is only performed for ARM architectures.
//
CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
unsigned structSize = varDsc->lvExactSize;
scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
#endif // _TARGET_ARMARCH_
}
// scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8)
if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
{
// Create a GT_LCL_FLD using the wider type to go to the late argument list
defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
}
else
{
// Create a copy of the temp to go to the late argument list
defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
}
curArgTabEntry->isTmp = true;
curArgTabEntry->tmpNum = tmpVarNum;
#ifdef _TARGET_ARM_
// Previously we might have thought the local was promoted, and thus the 'COPYBLK'
// might have left holes in the used registers (see
// fgAddSkippedRegsInPromotedStructArg).
// Too bad we're not that smart for these intermediate temps...
if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
{
regNumber argReg = curArgTabEntry->regNum;
regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
{
argReg = genRegArgNext(argReg);
allUsedRegs |= genRegMask(argReg);
}
}
#endif // _TARGET_ARM_
}
/* mark the assignment as a late argument */
setupArg->gtFlags |= GTF_LATE_ARG;
#ifdef DEBUG
if (compiler->verbose)
{
printf("\n Evaluate to a temp:\n");
compiler->gtDispTree(setupArg);
}
#endif
}
}
else // curArgTabEntry->needTmp == false
{
// On x86 -
// Only register args are replaced with placeholder nodes
// and the stack based arguments are evaluated and pushed in order.
//
// On Arm/x64 - When needTmp is false and needPlace is false,
// the non-register arguments are evaluated and stored in order.
// When needPlace is true we have a nested call that comes after
// this argument so we have to replace it in the gtCallArgs list
// (the initial argument evaluation list) with a placeholder.
//
if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
{
continue;
}
/* No temp needed - move the whole node to the gtCallLateArgs list */
/* The argument is deferred and put in the late argument list */
defArg = argx;
// Create a placeholder node to put in its place in gtCallLateArgs.
// For a struct type we also need to record the class handle of the arg.
CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
// All structs are either passed (and retyped) as integral types, OR they
// are passed by reference.
noway_assert(argx->gtType != TYP_STRUCT);
#else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
if (varTypeIsStruct(defArg))
{
// Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
GenTree* defArgTmp = defArg;
// The GT_OBJ may be be a child of a GT_COMMA.
while (defArgTmp->gtOper == GT_COMMA)
{
defArgTmp = defArgTmp->gtOp.gtOp2;
}
assert(varTypeIsStruct(defArgTmp));
// We handle two opcodes: GT_MKREFANY and GT_OBJ.
if (defArgTmp->gtOper == GT_MKREFANY)
{
clsHnd = compiler->impGetRefAnyClass();
}
else if (defArgTmp->gtOper == GT_OBJ)
{
clsHnd = defArgTmp->AsObj()->gtClass;
}
else
{
BADCODE("Unhandled struct argument tree in fgMorphArgs");
}
}
#endif // !(defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI))
setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
/* mark the placeholder node as a late argument */
setupArg->gtFlags |= GTF_LATE_ARG;
#ifdef DEBUG
if (compiler->verbose)
{
if (curArgTabEntry->regNum == REG_STK)
{
printf("Deferred stack argument :\n");
}
else
{
printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
}
compiler->gtDispTree(argx);
printf("Replaced with placeholder node:\n");
compiler->gtDispTree(setupArg);
}
#endif
}
if (setupArg != nullptr)
{
if (curArgTabEntry->parent)
{
GenTree* parent = curArgTabEntry->parent;
/* a normal argument from the list */
noway_assert(parent->OperIsList());
noway_assert(parent->gtOp.gtOp1 == argx);
parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT);
parent->gtOp.gtOp1 = setupArg;
}
else
{
/* must be the gtCallObjp */
noway_assert(callTree->gtCall.gtCallObjp == argx);
callTree->gtCall.gtCallObjp = setupArg;
}
}
/* deferred arg goes into the late argument list */
if (tmpRegArgNext == nullptr)
{
tmpRegArgNext = compiler->gtNewArgList(defArg);
callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
}
else
{
noway_assert(tmpRegArgNext->OperIsList());
noway_assert(tmpRegArgNext->Current());
tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
tmpRegArgNext = tmpRegArgNext->Rest();
}
tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
curArgTabEntry->node = defArg;
curArgTabEntry->lateArgInx = regArgInx++;
}
#ifdef DEBUG
if (compiler->verbose)
{
printf("\nShuffled argument table: ");
for (unsigned curInx = 0; curInx < argCount; curInx++)
{
fgArgTabEntry* curArgTabEntry = argTable[curInx];
if (curArgTabEntry->regNum != REG_STK)
{
printf("%s ", getRegName(curArgTabEntry->regNum));
}
}
printf("\n");
}
#endif
}
// Get the late arg for arg at position argIndex.
// argIndex - 0-based position to get late arg for.
// Caller must ensure this position has a late arg.
GenTree* fgArgInfo::GetLateArg(unsigned argIndex)
{
for (unsigned j = 0; j < this->ArgCount(); j++)
{
if (this->ArgTable()[j]->argNum == argIndex)
{
return this->ArgTable()[j]->node;
}
}
// Caller must ensure late arg exists.
unreached();
}
void fgArgInfo::RecordStkLevel(unsigned stkLvl)
{
assert(!IsUninitialized(stkLvl));
this->stkLevel = stkLvl;
}
unsigned fgArgInfo::RetrieveStkLevel()
{
assert(!IsUninitialized(stkLevel));
return stkLevel;
}
// Return a conservative estimate of the stack size in bytes.
// It will be used only on the intercepted-for-host code path to copy the arguments.
int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
{
int numArgs = 0;
for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
{
numArgs++;
}
int numStkArgs;
if (numArgs > MAX_REG_ARG)
{
numStkArgs = numArgs - MAX_REG_ARG;
}
else
{
numStkArgs = 0;
}
return numStkArgs * REGSIZE_BYTES;
}
//------------------------------------------------------------------------------
// fgMakeMultiUse : If the node is a local, clone it and increase the ref count
// otherwise insert a comma form temp
//
// Arguments:
// ppTree - a pointer to the child node we will be replacing with the comma expression that
// evaluates ppTree to a temp and returns the result
//
// Return Value:
// A fresh GT_LCL_VAR node referencing the temp which has not been used
//
// Assumption:
// The result tree MUST be added to the tree structure since the ref counts are
// already incremented.
GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
{
GenTree* tree = *pOp;
if (tree->IsLocal())
{
auto result = gtClone(tree);
if (lvaLocalVarRefCounted())
{
lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
}
return result;
}
else
{
GenTree* result = fgInsertCommaFormTemp(pOp);
// At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
// Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
// be added by the caller.
if (lvaLocalVarRefCounted())
{
lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
}
return result;
}
}
//------------------------------------------------------------------------------
// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
//
// Arguments:
// ppTree - a pointer to the child node we will be replacing with the comma expression that
// evaluates ppTree to a temp and returns the result
//
// structType - value type handle if the temp created is of TYP_STRUCT.
//
// Return Value:
// A fresh GT_LCL_VAR node referencing the temp which has not been used
//
GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
{
GenTree* subTree = *ppTree;
unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
if (varTypeIsStruct(subTree))
{
assert(structType != nullptr);
lvaSetStruct(lclNum, structType, false);
}
// If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
// The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
// setting type of lcl vars created.
GenTree* asg = gtNewTempAssign(lclNum, subTree);
GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
*ppTree = comma;
return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
}
//------------------------------------------------------------------------
// fgMorphArgs: Walk and transform (morph) the arguments of a call
//
// Arguments:
// callNode - the call for which we are doing the argument morphing
//
// Return Value:
// Like most morph methods, this method returns the morphed node,
// though in this case there are currently no scenarios where the
// node itself is re-created.
//
// Notes:
// This method is even less idempotent than most morph methods.
// That is, it makes changes that should not be redone. It uses the existence
// of gtCallLateArgs (the late arguments list) to determine if it has
// already done that work.
//
// The first time it is called (i.e. during global morphing), this method
// computes the "late arguments". This is when it determines which arguments
// need to be evaluated to temps prior to the main argument setup, and which
// can be directly evaluated into the argument location. It also creates a
// second argument list (gtCallLateArgs) that does the final placement of the
// arguments, e.g. into registers or onto the stack.
//
// The "non-late arguments", aka the gtCallArgs, are doing the in-order
// evaluation of the arguments that might have side-effects, such as embedded
// assignments, calls or possible throws. In these cases, it and earlier
// arguments must be evaluated to temps.
//
// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
// if we have any nested calls, we need to defer the copying of the argument
// into the fixed argument area until after the call. If the argument did not
// otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
// replaced in the "early" arg list (gtCallArgs) with a placeholder node.
#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
{
GenTree* args;
GenTree* argx;
unsigned flagsSummary = 0;
unsigned genPtrArgCntSav = fgPtrArgCntCur;
unsigned argIndex = 0;
unsigned intArgRegNum = 0;
unsigned fltArgRegNum = 0;
#ifdef _TARGET_ARM_
regMaskTP argSkippedRegMask = RBM_NONE;
regMaskTP fltArgSkippedRegMask = RBM_NONE;
#endif // _TARGET_ARM_
#if defined(_TARGET_X86_)
unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
#else
const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
#endif
unsigned argSlots = 0;
unsigned nonRegPassedStructSlots = 0;
bool reMorphing = call->AreArgsComplete();
bool callHasRetBuffArg = call->HasRetBufArg();
bool callIsVararg = call->IsVarargs();
JITDUMP("%sMorphing args for %d.%s:\n", (reMorphing) ? "Re" : "", call->gtTreeID, GenTree::OpName(call->gtOper));
#ifdef _TARGET_UNIX_
if (callIsVararg)
{
// Currently native varargs is not implemented on non windows targets.
//
// Note that some targets like Arm64 Unix should not need much work as
// the ABI is the same. While other targets may only need small changes
// such as amd64 Unix, which just expects RAX to pass numFPArguments.
NYI("Morphing Vararg call not yet implemented on non Windows targets.");
}
#endif // _TARGET_UNIX_
// Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
// following the normal calling convention or in the normal argument registers. We either mark existing
// arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
// non-standard arguments into the argument list, below.
class NonStandardArgs
{
struct NonStandardArg
{
regNumber reg; // The register to be assigned to this non-standard argument.
GenTree* node; // The tree node representing this non-standard argument.
// Note that this must be updated if the tree node changes due to morphing!
};
ArrayStack<NonStandardArg> args;
public:
NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments
{
}
//-----------------------------------------------------------------------------
// Add: add a non-standard argument to the table of non-standard arguments
//
// Arguments:
// node - a GenTree node that has a non-standard argument.
// reg - the register to assign to this node.
//
// Return Value:
// None.
//
void Add(GenTree* node, regNumber reg)
{
NonStandardArg nsa = {reg, node};
args.Push(nsa);
}
//-----------------------------------------------------------------------------
// Find: Look for a GenTree* in the set of non-standard args.
//
// Arguments:
// node - a GenTree node to look for
//
// Return Value:
// The index of the non-standard argument (a non-negative, unique, stable number).
// If the node is not a non-standard argument, return -1.
//
int Find(GenTree* node)
{
for (int i = 0; i < args.Height(); i++)
{
if (node == args.Index(i).node)
{
return i;
}
}
return -1;
}
//-----------------------------------------------------------------------------
// FindReg: Look for a GenTree node in the non-standard arguments set. If found,
// set the register to use for the node.
//
// Arguments:
// node - a GenTree node to look for
// pReg - an OUT argument. *pReg is set to the non-standard register to use if
// 'node' is found in the non-standard argument set.
//
// Return Value:
// 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
// register to use.
// 'false' otherwise (in this case, *pReg is unmodified).
//
bool FindReg(GenTree* node, regNumber* pReg)
{
for (int i = 0; i < args.Height(); i++)
{
NonStandardArg& nsa = args.IndexRef(i);
if (node == nsa.node)
{
*pReg = nsa.reg;
return true;
}
}
return false;
}
//-----------------------------------------------------------------------------
// Replace: Replace the non-standard argument node at a given index. This is done when
// the original node was replaced via morphing, but we need to continue to assign a
// particular non-standard arg to it.
//
// Arguments:
// index - the index of the non-standard arg. It must exist.
// node - the new GenTree node.
//
// Return Value:
// None.
//
void Replace(int index, GenTree* node)
{
args.IndexRef(index).node = node;
}
} nonStandardArgs(getAllocator(CMK_ArrayStack));
// Count of args. On first morph, this is counted before we've filled in the arg table.
// On remorph, we grab it from the arg table.
unsigned numArgs = 0;
// Process the late arguments (which were determined by a previous caller).
// Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
// may need to refer to it.
if (reMorphing)
{
// We need to reMorph the gtCallLateArgs early since that is what triggers
// the expression folding and we need to have the final folded gtCallLateArgs
// available when we call RemorphRegArg so that we correctly update the fgArgInfo
// with the folded tree that represents the final optimized argument nodes.
//
// However if a range-check needs to be generated for any of these late
// arguments we also need to "know" what the stack depth will be when we generate
// code to branch to the throw range check failure block as that is part of the
// GC information contract for that block.
//
// Since the late arguments are evaluated last we have pushed all of the
// other arguments on the stack before we evaluate these late arguments,
// so we record the stack depth on the first morph call when reMorphing
// was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
//
if (call->gtCallLateArgs != nullptr)
{
unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
fgPtrArgCntCur += callStkLevel;
call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
flagsSummary |= call->gtCallLateArgs->gtFlags;
fgPtrArgCntCur -= callStkLevel;
}
assert(call->fgArgInfo != nullptr);
call->fgArgInfo->RemorphReset();
numArgs = call->fgArgInfo->ArgCount();
}
else
{
// First we need to count the args
if (call->gtCallObjp)
{
numArgs++;
}
for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
{
numArgs++;
}
// Insert or mark non-standard args. These are either outside the normal calling convention, or
// arguments registers that don't follow the normal progression of argument registers in the calling
// convention (such as for the ARM64 fixed return buffer argument x8).
//
// *********** NOTE *************
// The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
// in the implementation of fast tail call.
// *********** END NOTE *********
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
// The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
// Set the argument registers correctly here.
if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
{
GenTreeArgList* args = call->gtCallArgs;
GenTree* arg1 = args->Current();
assert(arg1 != nullptr);
nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
}
#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
#if defined(_TARGET_ARM_)
// A non-standard calling convention using secure delegate invoke is used on ARM, only, but not for secure
// delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing
// R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs
// to be preserved. Thus, the VM uses a secure delegate IL stub, which preserves R4 and also sets up R4
// correctly for the VSD call. The VM is simply reusing an existing mechanism (secure delegate IL stub)
// to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details.
else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)
{
GenTree* arg = call->gtCallObjp;
if (arg->OperIsLocal())
{
arg = gtClone(arg, true);
}
else
{
GenTree* tmp = fgInsertCommaFormTemp(&arg);
call->gtCallObjp = arg;
call->gtFlags |= GTF_ASG;
arg = tmp;
}
noway_assert(arg != nullptr);
GenTree* newArg = new (this, GT_ADDR)
GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell);
// Append newArg as the last arg
GenTreeArgList** insertionPoint = &call->gtCallArgs;
for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
{
}
*insertionPoint = gtNewListNode(newArg, nullptr);
numArgs++;
nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg());
}
#endif // defined(_TARGET_ARM_)
#if defined(_TARGET_X86_)
// The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
// hi part to be in EDX. This sets the argument registers up correctly.
else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
call->IsHelperCall(this, CORINFO_HELP_LRSZ))
{
GenTreeArgList* args = call->gtCallArgs;
GenTree* arg1 = args->Current();
assert(arg1 != nullptr);
nonStandardArgs.Add(arg1, REG_LNGARG_LO);
args = args->Rest();
GenTree* arg2 = args->Current();
assert(arg2 != nullptr);
nonStandardArgs.Add(arg2, REG_LNGARG_HI);
}
#else // !_TARGET_X86_
// TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
// If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
// convention for x86/SSE.
// If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
//
if (hasFixedRetBuffReg() && call->HasRetBufArg())
{
args = call->gtCallArgs;
assert(args != nullptr);
assert(args->OperIsList());
argx = call->gtCallArgs->Current();
// We don't increment numArgs here, since we already counted this argument above.
nonStandardArgs.Add(argx, theFixedRetBuffReg());
}
// We are allowed to have a Fixed Return Buffer argument combined
// with any of the remaining non-standard arguments
//
if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
{
assert(!call->gtCallCookie);
// Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
// It will be used only on the intercepted-for-host code path to copy the arguments.
GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
numArgs++;
nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
}
else if (call->IsVirtualStub())
{
if (!call->IsTailCallViaHelper())
{
GenTree* stubAddrArg = fgGetStubAddrArg(call);
// And push the stub address onto the list of arguments
call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
numArgs++;
nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum);
}
else
{
// If it is a VSD call getting dispatched via tail call helper,
// fgMorphTailCall() would materialize stub addr as an additional
// parameter added to the original arg list and hence no need to
// add as a non-standard arg.
}
}
else
#endif // !_TARGET_X86_
if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
{
assert(!call->IsUnmanaged());
GenTree* arg = call->gtCallCookie;
noway_assert(arg != nullptr);
call->gtCallCookie = nullptr;
#if defined(_TARGET_X86_)
// x86 passes the cookie on the stack as the final argument to the call.
GenTreeArgList** insertionPoint = &call->gtCallArgs;
for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
{
}
*insertionPoint = gtNewListNode(arg, nullptr);
#else // !defined(_TARGET_X86_)
// All other architectures pass the cookie in a register.
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
#endif // defined(_TARGET_X86_)
nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
numArgs++;
// put destination into R10/EAX
arg = gtClone(call->gtCallAddr, true);
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
// finally change this call to a helper call
call->gtCallType = CT_HELPER;
call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
}
#if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_)
// For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg()
// for indirection cell address, which ZapIndirectHelperThunk expects.
if (call->IsR2RRelativeIndir())
{
assert(call->gtEntryPoint.addr != nullptr);
size_t addrValue = (size_t)call->gtEntryPoint.addr;
GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR);
indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM;
// Push the stub address onto the list of arguments.
call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs);
numArgs++;
nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum);
}
#endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_
// Allocate the fgArgInfo for the call node;
//
call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
}
/* First we morph the argument subtrees ('this' pointer, arguments, etc.).
* During the first call to fgMorphArgs we also record the
* information about late arguments we have in 'fgArgInfo'.
* This information is used later to contruct the gtCallLateArgs */
/* Process the 'this' argument value, if present */
argx = call->gtCallObjp;
if (argx)
{
argx = fgMorphTree(argx);
call->gtCallObjp = argx;
flagsSummary |= argx->gtFlags;
assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
assert(argIndex == 0);
/* We must fill in or update the argInfo table */
if (reMorphing)
{
/* this is a register argument - possibly update it in the table */
call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
}
else
{
assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
/* this is a register argument - put it in the table */
call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false,
callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr));
}
// this can't be a struct.
assert(argx->gtType != TYP_STRUCT);
/* Increment the argument register count and argument index */
if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
{
intArgRegNum++;
#ifdef WINDOWS_AMD64_ABI
// Whenever we pass an integer register argument
// we skip the corresponding floating point register argument
fltArgRegNum++;
#endif // WINDOWS_AMD64_ABI
}
else
{
noway_assert(!"the 'this' pointer can not be a floating point type");
}
argIndex++;
argSlots++;
}
#ifdef _TARGET_X86_
// Compute the maximum number of arguments that can be passed in registers.
// For X86 we handle the varargs and unmanaged calling conventions
if (call->gtFlags & GTF_CALL_POP_ARGS)
{
noway_assert(intArgRegNum < MAX_REG_ARG);
// No more register arguments for varargs (CALL_POP_ARGS)
maxRegArgs = intArgRegNum;
// Add in the ret buff arg
if (callHasRetBuffArg)
maxRegArgs++;
}
if (call->IsUnmanaged())
{
noway_assert(intArgRegNum == 0);
if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
{
noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
call->gtCallArgs->gtOp.gtOp1->gtOper ==
GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
maxRegArgs = 1;
}
else
{
maxRegArgs = 0;
}
// Add in the ret buff arg
if (callHasRetBuffArg)
maxRegArgs++;
}
#endif // _TARGET_X86_
/* Morph the user arguments */
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM_)
// The ARM ABI has a concept of back-filling of floating-point argument registers, according
// to the "Procedure Call Standard for the ARM Architecture" document, especially
// section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
// appear in a lower-numbered register than floating point argument N. That is, argument
// register allocation is not strictly increasing. To support this, we need to keep track of unused
// floating-point argument registers that we can back-fill. We only support 4-byte float and
// 8-byte double types, and one to four element HFAs composed of these types. With this, we will
// only back-fill single registers, since there is no way with these types to create
// an alignment hole greater than one register. However, there can be up to 3 back-fill slots
// available (with 16 FP argument registers). Consider this code:
//
// struct HFA { float x, y, z; }; // a three element HFA
// void bar(float a1, // passed in f0
// double a2, // passed in f2/f3; skip f1 for alignment
// HFA a3, // passed in f4/f5/f6
// double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
// HFA a5, // passed in f10/f11/f12
// double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
// // slots
// float a7, // passed in f1 (back-filled)
// float a8, // passed in f7 (back-filled)
// float a9, // passed in f13 (back-filled)
// float a10) // passed on the stack in [OutArg+0]
//
// Note that if we ever support FP types with larger alignment requirements, then there could
// be more than single register back-fills.
//
// Once we assign a floating-pointer register to the stack, they all must be on the stack.
// See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
// continues only so long as no VFP CPRC has been allocated to a slot on the stack."
// We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
// and prevent any additional floating-point arguments from going in registers.
bool anyFloatStackArgs = false;
#endif // _TARGET_ARM_
#ifdef UNIX_AMD64_ABI
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
#endif // UNIX_AMD64_ABI
// Note that this name is a bit of a misnomer - it indicates that there are struct args
// that occupy more than a single slot that are passed by value (not necessarily in regs).
bool hasMultiregStructArgs = false;
for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
{
GenTree** parentArgx = &args->gtOp.gtOp1;
// Record the index of any nonStandard arg that we may be processing here, as we are
// about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
GenTree* orig_argx = *parentArgx;
int nonStandard_index = nonStandardArgs.Find(orig_argx);
argx = fgMorphTree(*parentArgx);
*parentArgx = argx;
assert(args->OperIsList());
assert(argx == args->Current());
if ((nonStandard_index != -1) && (argx != orig_argx))
{
// We need to update the node field for this nonStandard arg here
// as it was changed by the call to fgMorphTree
nonStandardArgs.Replace(nonStandard_index, argx);
}
/* Change the node to TYP_I_IMPL so we don't report GC info
* NOTE: We deferred this from the importer because of the inliner */
if (argx->IsVarAddr())
{
argx->gtType = TYP_I_IMPL;
}
bool passUsingFloatRegs;
unsigned argAlign = 1;
unsigned size = 0;
CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
bool isRegArg = false;
bool isNonStandard = false;
regNumber nonStdRegNum = REG_NA;
fgArgTabEntry* argEntry = nullptr;
if (reMorphing)
{
argEntry = gtArgEntryByArgNum(call, argIndex);
}
// Setup any HFA information about 'argx'
var_types hfaType = TYP_UNDEF;
bool isHfaArg = false;
unsigned hfaSlots = 0;
#ifdef FEATURE_HFA
if (reMorphing)
{
isHfaArg = argEntry->isHfaRegArg;
hfaType = argEntry->hfaType;
hfaSlots = argEntry->numRegs;
}
else
{
hfaType = GetHfaType(argx);
if (varTypeIsFloating(hfaType))
{
isHfaArg = true;
hfaSlots = GetHfaCount(argx);
}
}
#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
// Make sure for vararg methods isHfaArg is not
// true.
isHfaArg = callIsVararg ? false : isHfaArg;
#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
if (isHfaArg)
{
// If we have a HFA struct it's possible we transition from a method that originally
// only had integer types to now start having FP types. We have to communicate this
// through this flag since LSRA later on will use this flag to determine whether
// or not to track the FP register set.
//
compFloatingPointUsed = true;
}
#endif // FEATURE_HFA
#ifdef _TARGET_ARM_
bool passUsingIntRegs;
if (reMorphing)
{
passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
}
else
{
passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
}
GenTree* curArg = argx;
// If late args have already been computed, use the node in the argument table.
if (argEntry != NULL && argEntry->isTmp)
{
curArg = argEntry->node;
}
if (reMorphing)
{
argAlign = argEntry->alignment;
}
else
{
// We don't use the "size" return value from InferOpSizeAlign().
codeGen->InferOpSizeAlign(curArg, &argAlign);
argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
argAlign /= TARGET_POINTER_SIZE;
}
if (argAlign == 2)
{
if (passUsingFloatRegs)
{
if (fltArgRegNum % 2 == 1)
{
fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
fltArgRegNum++;
}
}
else if (passUsingIntRegs)
{
if (intArgRegNum % 2 == 1)
{
argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
intArgRegNum++;
}
}
if (argSlots % 2 == 1)
{
argSlots++;
}
}
#elif defined(_TARGET_ARM64_)
if (reMorphing)
{
passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
}
else
{
passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
}
#elif defined(_TARGET_AMD64_)
if (reMorphing)
{
passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
}
else
{
passUsingFloatRegs = varTypeIsFloating(argx);
}
#elif defined(_TARGET_X86_)
passUsingFloatRegs = false;
#else
#error Unsupported or unset target architecture
#endif // _TARGET_*
bool isBackFilled = false;
unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
var_types structBaseType = TYP_STRUCT;
unsigned structSize = 0;
bool isStructArg;
if (reMorphing)
{
assert(argEntry != nullptr);
// Struct arguments may be morphed into a node that is not a struct type.
// In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
// was a struct and the struct classification.
isStructArg = argEntry->isStruct;
#if defined(UNIX_AMD64_ABI)
if (isStructArg)
{
structDesc.CopyFrom(argEntry->structDesc);
}
#endif // defined(UNIX_AMD64_ABI)
if (argEntry->IsBackFilled())
{
isRegArg = true;
size = argEntry->numRegs;
nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
assert(size == 1);
isBackFilled = true;
}
else if (argEntry->regNum == REG_STK)
{
isRegArg = false;
assert(argEntry->numRegs == 0);
size = argEntry->numSlots;
}
else
{
isRegArg = true;
assert(argEntry->numRegs > 0);
size = argEntry->numRegs + argEntry->numSlots;
#ifdef _TARGET_ARM_
if (argEntry->isHfaRegArg && (hfaType == TYP_DOUBLE))
{
assert(!argEntry->isSplit);
size <<= 1;
}
#endif // _TARGET_ARM_
}
// This size has now been computed
assert(size != 0);
isNonStandard = argEntry->isNonStandard;
}
else // !reMorphing
{
//
// Figure out the size of the argument. This is either in number of registers, or number of
// TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
// the stack.
//
isStructArg = varTypeIsStruct(argx);
if (argx->IsArgPlaceHolderNode() || (!isStructArg))
{
#if defined(_TARGET_AMD64_)
#ifdef UNIX_AMD64_ABI
if (!isStructArg)
{
size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
}
else
{
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
TARGET_POINTER_SIZE)) /
TARGET_POINTER_SIZE;
eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
if (size > 1)
{
hasMultiregStructArgs = true;
}
}
#else // !UNIX_AMD64_ABI
size = 1; // On AMD64 Windows, all primitives fit in a single (64-bit) 'slot'
#endif // UNIX_AMD64_ABI
#elif defined(_TARGET_ARM64_)
if (isStructArg)
{
if (isHfaArg)
{
size = GetHfaCount(argx);
// HFA structs are passed by value in multiple registers
hasMultiregStructArgs = true;
}
else
{
// Structs are either passed in 1 or 2 (64-bit) slots
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
TARGET_POINTER_SIZE)) /
TARGET_POINTER_SIZE;
if (size == 2)
{
// Structs that are the size of 2 pointers are passed by value in multiple registers,
// if sufficient registers are available.
hasMultiregStructArgs = true;
}
else if (size > 2)
{
size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
// reference (to a copy)
}
}
// Note that there are some additional rules for multireg structs.
// (i.e they cannot be split between registers and the stack)
}
else
{
size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
}
#elif defined(_TARGET_ARM_)
if (isStructArg)
{
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
TARGET_POINTER_SIZE)) /
TARGET_POINTER_SIZE;
if (isHfaArg || size > 1)
{
hasMultiregStructArgs = true;
}
}
else
{
// The typical case
// long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
size = genTypeStSz(argx->gtType);
}
#elif defined(_TARGET_X86_)
size = genTypeStSz(argx->gtType);
#else
#error Unsupported or unset target architecture
#endif // _TARGET_XXX_
}
else // struct type
{
// We handle two opcodes: GT_MKREFANY and GT_OBJ
if (argx->gtOper == GT_MKREFANY)
{
if (varTypeIsStruct(argx))
{
isStructArg = true;
}
#ifdef _TARGET_AMD64_
#if defined(UNIX_AMD64_ABI)
if (varTypeIsStruct(argx))
{
size = info.compCompHnd->getClassSize(impGetRefAnyClass());
unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
size = roundupSize / TARGET_POINTER_SIZE;
eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
}
else
#endif // defined(UNIX_AMD64_ABI)
{
size = 1;
}
#else
size = 2;
#endif
}
else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
{
GenTree* argObj = argx;
GenTree** parentOfArgObj = parentArgx;
assert(args->OperIsList());
assert(argx == args->Current());
/* The GT_OBJ may be be a child of a GT_COMMA */
while (argObj->gtOper == GT_COMMA)
{
parentOfArgObj = &argObj->gtOp.gtOp2;
argObj = argObj->gtOp.gtOp2;
}
// TODO-1stClassStructs: An OBJ node should not be required for lclVars.
if (argObj->gtOper != GT_OBJ)
{
BADCODE("illegal argument tree in fgMorphArgs");
}
CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
#ifdef UNIX_AMD64_ABI
eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
#endif // UNIX_AMD64_ABI
unsigned originalSize = info.compCompHnd->getClassSize(objClass);
originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
structSize = originalSize;
structPassingKind howToPassStruct;
structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize);
bool passStructByRef = false;
unsigned passingSize = originalSize;
#ifndef _TARGET_X86_
// Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size.
// That is the else clause of the if statement below.
// When it can do this is platform-dependent:
// - In general, it can be done for power of 2 structs that fit in a single register.
// - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
// - This is irrelevant for X86, since structs are always passed by value on the stack.
// Note that 'howToPassStruct' captures all but the power-of-2 requirement.
GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
bool canTransformToInd = false;
if (howToPassStruct == SPK_PrimitiveType)
{
if (isPow2(passingSize))
{
canTransformToInd = true;
}
#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
// For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can
// only transform to an indirection in that case if we are loading from a local.
// TODO-CQ: This transformation should be applicable in general, not just for the ARM64
// or UNIX_AMD64_ABI cases where they will be passed in registers.
else
{
canTransformToInd = (lclVar != nullptr);
passingSize = genTypeSize(structBaseType);
}
#endif // _TARGET_ARM64_ || UNIX_AMD64_ABI
}
if (!canTransformToInd)
{
// Normalize 'size' to the number of pointer sized items
// 'size' is the number of register slots that we will use to pass the argument
size = roundupSize / TARGET_POINTER_SIZE;
#if defined(_TARGET_AMD64_)
#ifndef UNIX_AMD64_ABI
// On Windows structs are always copied and passed by reference unless they are
// passed by value in a single register.
size = 1; // This must be copied to a temp and passed by address
passStructByRef = true;
copyBlkClass = objClass;
#else // UNIX_AMD64_ABI
// On Unix, structs are always passed by value.
// We only need a copy if we have one of the following:
// - We have a lclVar that has been promoted and is passed in registers.
// - The sizes don't match.
// - We have a vector intrinsic.
// TODO-Amd64-Unix-CQ: The first and last case could and should be handled without copies.
copyBlkClass = NO_CLASS_HANDLE;
if (structDesc.passedInRegisters)
{
if ((lclVar != nullptr) &&
(lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT))
{
copyBlkClass = objClass;
}
else if (passingSize != structSize)
{
copyBlkClass = objClass;
}
else
{
GenTree* addr = argObj->gtGetOp1();
if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_SIMD, GT_HWIntrinsic))
{
copyBlkClass = objClass;
}
}
}
#endif // UNIX_AMD64_ABI
#elif defined(_TARGET_ARM64_)
if ((size > 2) && !isHfaArg)
{
size = 1; // This must be copied to a temp and passed by address
passStructByRef = true;
copyBlkClass = objClass;
}
else if ((passingSize != structSize) && (lclVar == nullptr))
{
copyBlkClass = objClass;
}
#endif
#ifdef _TARGET_ARM_
// If we're passing a promoted struct local var,
// we may need to skip some registers due to alignment; record those.
if (lclVar != nullptr)
{
LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
if (varDsc->lvPromoted)
{
assert(argObj->OperGet() == GT_OBJ);
if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
{
fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
}
copyBlkClass = objClass;
}
}
if (structSize < TARGET_POINTER_SIZE)
{
copyBlkClass = objClass;
}
#endif // _TARGET_ARM_
}
else // We have a struct argument with size 1, 2, 4 or 8 bytes
{
// change our GT_OBJ into a GT_IND of the correct type.
// We've already ensured above that size is a power of 2, and less than or equal to pointer
// size.
assert(howToPassStruct == SPK_PrimitiveType);
noway_assert(structBaseType != TYP_UNKNOWN);
assert(passingSize == genTypeSize(structBaseType));
argObj->ChangeOper(GT_IND);
// Now see if we can fold *(&X) into X
if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
{
GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
// Keep the DONT_CSE flag in sync
// (as the addr always marks it for its op1)
temp->gtFlags &= ~GTF_DONT_CSE;
temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
DEBUG_DESTROY_NODE(argObj); // GT_IND
argObj = temp;
*parentOfArgObj = temp;
// If the OBJ had been the top level node, we've now changed argx.
if (parentOfArgObj == parentArgx)
{
argx = temp;
}
}
if (argObj->gtOper == GT_LCL_VAR)
{
unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
LclVarDsc* varDsc = &lvaTable[lclNum];
if (varDsc->lvPromoted)
{
if (varDsc->lvFieldCnt == 1)
{
// get the first and only promoted field
LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
if (genTypeSize(fieldVarDsc->TypeGet()) >= passingSize)
{
// we will use the first and only promoted field
argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
(genTypeSize(fieldVarDsc->TypeGet()) == passingSize))
{
// Just use the existing field's type
argObj->gtType = fieldVarDsc->TypeGet();
}
else
{
// Can't use the existing field's type, so use GT_LCL_FLD to swizzle
// to a new type
argObj->ChangeOper(GT_LCL_FLD);
argObj->gtType = structBaseType;
}
assert(varTypeCanReg(argObj->TypeGet()));
assert(copyBlkClass == NO_CLASS_HANDLE);
}
else
{
// use GT_LCL_FLD to swizzle the single field struct to a new type
lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
argObj->ChangeOper(GT_LCL_FLD);
argObj->gtType = structBaseType;
}
}
else
{
// The struct fits into a single register, but it has been promoted into its
// constituent fields, and so we have to re-assemble it
copyBlkClass = objClass;
#ifdef _TARGET_ARM_
// Alignment constraints may cause us not to use (to "skip") some argument
// registers. Add those, if any, to the skipped (int) arg reg mask.
fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
#endif // _TARGET_ARM_
}
}
else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
{
// Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
argObj->ChangeOper(GT_LCL_FLD);
argObj->gtType = structBaseType;
}
}
else
{
// Not a GT_LCL_VAR, so we can just change the type on the node
argO