diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index fd2cfbaf665b..166bd319bd98 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -4821,7 +4821,15 @@ void CodeGen::genCheckUseBlockInit() we waste all the other slots. Really need to compute the correct and compare that against zeroing the slots individually */ - genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4)); +#if defined(UNIX_AMD64_ABI) + // For AMD64_UNIX don't use block initialization if there is no FrameRegister. + // The RDI and RSI registers are in the block initialization and are also + // the first two parameters to a callee. + // Need to push and pop them in the prolog and this will break unwinding. + genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4)) && isFramePointerUsed(); +#else // UNIX_AMD64_ABI + genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4)); +#endif // UNIX_AMD64_ABI if (genUseBlockInit) { @@ -6072,6 +6080,12 @@ void CodeGen::genZeroInitFrame(int untrLclHi, if (genUseBlockInit) { +#ifdef UNIX_AMD64_ABI + // Should not be here for Unix AMD64 if there is no Frame Pointer used. + // No block initialization in this case. + assert(isFramePointerUsed()); +#endif // UNIX_AMD64_ABI + assert(untrLclHi > untrLclLo); #ifdef _TARGET_ARMARCH_ /* @@ -6266,21 +6280,12 @@ void CodeGen::genZeroInitFrame(int untrLclHi, } noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0); - int disp = untrLclLo; -#ifdef UNIX_AMD64_ABI - // If there is no frame register the pushes above mess up the - // RSP, so adjust by adding 0x10 to the offset. - if (!isFramePointerUsed()) - { - disp += 0x10; - } -#endif // UNIX_AMD64_ABI getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), - disp); + untrLclLo); regTracker.rsTrackRegTrash(REG_EDI); inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE); @@ -6294,8 +6299,8 @@ void CodeGen::genZeroInitFrame(int untrLclHi, #ifdef UNIX_AMD64_ABI // Restore the RDI and RSI. - inst_RV(INS_pop, REG_RSI, TYP_I_IMPL); - inst_RV(INS_pop, REG_RDI, TYP_I_IMPL); + inst_RV(INS_pop, REG_RSI, TYP_I_IMPL); + inst_RV(INS_pop, REG_RDI, TYP_I_IMPL); #endif // UNIX_AMD64_ABI #else // _TARGET_* #error Unsupported or unset target architecture diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index ec334a76a672..60e0ebe33862 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -4040,10 +4040,15 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize #if defined(_TARGET_X86_) argOffs += sizeof(void *); #elif defined(_TARGET_AMD64_) -#ifndef UNIX_AMD64_ABI - varDsc->lvStkOffs = argOffs; - argOffs += sizeof(void *); -#endif // !UNIX_AMD64_ABI +#ifdef UNIX_AMD64_ABI + // Reserve space on the stack only for OnFrame variables. + // No need to do that for OutgoingArg. No such thing on Linux. + if (varDsc->lvOnFrame) +#endif // UNIX_AMD64_ABI + { + varDsc->lvStkOffs = argOffs; + argOffs += sizeof(void *); + } #elif defined(_TARGET_ARM64_) // Register arguments don't take stack space. #elif defined(_TARGET_ARM_) diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 0bb366eb2109..778c64430650 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -1489,6 +1489,37 @@ LinearScan::doLinearScan() compiler->codeGen->regSet.rsClearRegsModified(); +#ifdef UNIX_AMD64_ABI + // Count the numbers of must initialize local vars. + // Set the FramePointerRequired if there are more or equal to MAX_VARS_FOR_NO_FRAMEPOINTER MustInit vars. + // This way block initialize can be used. On Linux stosd requires RDI and RSI, + // which are the first 2 parameters to a callee. They need to be preserved on the stack. PUSH/POP + // without FrameRegister breaks unwinding. + // If more than MAX_VARS_FOR_NO_FRAMEPOINTER vars are used, the code for initializing the vars gets + // big and an instruction group is not enough - + // (in emit.cpp there is an assert assert(emitCurIG != emitPrologIG);) multi_IG prologs are not allowed. + // So, set the frame to have a frame pointer and use block initialization. + unsigned lclNum; + unsigned lclMustInitCnt = 0; + LclVarDsc *varDsc; + + for (lclNum = 0, varDsc = compiler->lvaTable; + lclNum < compiler->lvaCount; + lclNum++, varDsc++) + { + if (varDsc->lvMustInit) + { + lclMustInitCnt++; + } + } + + if (lclMustInitCnt >= MAX_VARS_FOR_NO_FRAMEPOINTER) + { + compiler->codeGen->setFramePointerRequired(true); + } + +#endif // UNIX_AMD64_ABI + // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this // before building the intervals and ref positions, because those objects will embed // RBP in various register masks (like preferences) if RBP is allowed to be allocated. diff --git a/src/jit/target.h b/src/jit/target.h index 7a0bb572026e..e730481a3154 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -698,6 +698,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_FLT_CALLEE_SAVED (0) #define RBM_FLT_CALLEE_TRASH (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \ RBM_XMM8|RBM_XMM9|RBM_XMM10|RBM_XMM11|RBM_XMM12|RBM_XMM13|RBM_XMM14|RBM_XMM15) + // Use this value to specify how many MustInit vars on Linux would trigger a FramePointer to be used. + // If there is no FramePointer blockInit in codegencommon.cpp is not used. There is a limit to the size of the + // prolog (it should not exceed one IG.) Make sure we don't get in such case. + #define MAX_VARS_FOR_NO_FRAMEPOINTER 6 #endif // UNIX_AMD64_ABI #define REG_FLT_CALLEE_SAVED_FIRST REG_XMM6