From 52d34197d2e905efea903e8218fed822023eae3f Mon Sep 17 00:00:00 2001 From: Sergey Date: Sat, 3 Jul 2021 02:57:01 -0700 Subject: [PATCH 1/4] enable for arm32. fix arm32 Fix arm/arm64. now we can have contained lclRead for other platforms, not only xarch. --- src/coreclr/jit/codegen.h | 2 - src/coreclr/jit/codegenarm.cpp | 16 +++++--- src/coreclr/jit/codegenarmarch.cpp | 52 -------------------------- src/coreclr/jit/codegencommon.cpp | 59 ++++++++++++++++++++++++++++++ src/coreclr/jit/codegenlinear.cpp | 2 +- src/coreclr/jit/codegenxarch.cpp | 58 ----------------------------- src/coreclr/jit/jitconfigvalues.h | 2 +- src/coreclr/jit/lclvars.cpp | 21 +++++++++++ src/coreclr/jit/lowerarmarch.cpp | 6 +++ 9 files changed, 99 insertions(+), 119 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 626cb3e5b7bd6..65cf32b02020d 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -866,10 +866,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Generate code for a GT_BITCAST that is not contained. void genCodeForBitCast(GenTreeOp* treeNode); -#if defined(TARGET_XARCH) // Generate the instruction to move a value between register files void genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg); -#endif // TARGET_XARCH struct GenIntCastDesc { diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 5993f7015ec2b..49fc8c9c3038e 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1044,15 +1044,21 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) // void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) { - GenTree* data = tree->gtOp1; - + GenTree* data = tree->gtOp1; + GenTree* actualData = data->gtSkipReloadOrCopy(); + unsigned regCount = 1; // var = call, where call returns a multi-reg return value // case is handled separately. - if (data->gtSkipReloadOrCopy()->IsMultiRegNode()) + if (actualData->IsMultiRegNode()) { - genMultiRegStoreToLocal(tree); + regCount = actualData->IsMultiRegLclVar() ? actualData->AsLclVar()->GetFieldCount(compiler) + : actualData->GetMultiRegCount(); + if (regCount > 1) + { + genMultiRegStoreToLocal(tree); + } } - else + if (regCount == 1) { unsigned varNum = tree->GetLclNum(); assert(varNum < compiler->lvaCount); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 71f775d70d872..292df210aab22 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1125,58 +1125,6 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genProduceReg(tree); } -//---------------------------------------------------------------------- -// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained -// -// Arguments -// treeNode - the GT_BITCAST for which we're generating code -// -void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) -{ - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - GenTree* op1 = treeNode->gtGetOp1(); - genConsumeRegs(op1); - if (op1->isContained()) - { - assert(op1->IsLocal() || op1->isIndir()); - op1->gtType = treeNode->TypeGet(); - op1->SetRegNum(targetReg); - op1->ClearContained(); - JITDUMP("Changing type of BITCAST source to load directly."); - genCodeForTreeNode(op1); - } - else if (varTypeUsesFloatReg(treeNode) != varTypeUsesFloatReg(op1)) - { - regNumber srcReg = op1->GetRegNum(); - assert(genTypeSize(op1->TypeGet()) == genTypeSize(targetType)); -#ifdef TARGET_ARM - if (genTypeSize(targetType) == 8) - { - // Converting between long and double on ARM is a special case. - if (targetType == TYP_LONG) - { - regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg; - assert(otherReg != REG_NA); - inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, srcReg, EA_8BYTE); - } - else - { - NYI_ARM("Converting from long to double"); - } - } - else -#endif // TARGET_ARM - { - inst_Mov(targetType, targetReg, srcReg, /* canSkip */ false); - } - } - else - { - inst_Mov(targetType, targetReg, genConsumeReg(op1), /* canSkip */ false); - } -} - #if FEATURE_ARG_SPLIT //--------------------------------------------------------------------- // genPutArgSplit - generate code for a GT_PUTARG_SPLIT node diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index d2e08159ec104..be9d7f295a958 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -11329,6 +11329,7 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) assert(op1->IsMultiRegNode()); unsigned regCount = actualOp1->IsMultiRegLclVar() ? actualOp1->AsLclVar()->GetFieldCount(compiler) : actualOp1->GetMultiRegCount(); + assert(regCount > 1); // Assumption: current implementation requires that a multi-reg // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from @@ -12580,3 +12581,61 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) } } } + +//---------------------------------------------------------------------- +// genBitCast - Generate the instruction to move a value between register files +// +// Arguments +// targetType - the destination type +// targetReg - the destination register +// srcType - the source type +// srcReg - the source register +// +void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg) +{ + const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType); + assert(srcFltReg == genIsValidFloatReg(srcReg)); + + const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType); + assert(dstFltReg == genIsValidFloatReg(targetReg)); + + inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true); +} + +//---------------------------------------------------------------------- +// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained +// +// Arguments +// treeNode - the GT_BITCAST for which we're generating code +// +void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) +{ + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + GenTree* op1 = treeNode->gtGetOp1(); + genConsumeRegs(op1); + + if (op1->isContained()) + { + assert(op1->IsLocal() || op1->isIndir()); + if (genIsRegCandidateLocal(op1)) + { + unsigned lclNum = op1->AsLclVar()->GetLclNum(); + GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)), + emitTypeSize(treeNode), targetReg, lclNum, 0); + } + else + { + op1->gtType = treeNode->TypeGet(); + op1->SetRegNum(targetReg); + op1->ClearContained(); + JITDUMP("Changing type of BITCAST source to load directly.\n"); + genCodeForTreeNode(op1); + } + } + else + { + genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum()); + } + genProduceReg(treeNode); +} diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index aa2fb0f58955f..f58a8db0997e3 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1577,7 +1577,6 @@ void CodeGen::genConsumeRegs(GenTree* tree) { genConsumeAddress(tree); } -#ifdef TARGET_XARCH else if (tree->OperIsLocalRead()) { // A contained lcl var must be living on stack and marked as reg optional, or not be a @@ -1591,6 +1590,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) // Update the life of the lcl var. genUpdateLife(tree); } +#ifdef TARGET_XARCH #ifdef FEATURE_HW_INTRINSICS else if (tree->OperIs(GT_HWINTRINSIC)) { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 2bd0142381f62..6f605e5514bdc 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7073,64 +7073,6 @@ void CodeGen::genIntrinsic(GenTree* treeNode) genProduceReg(treeNode); } -//---------------------------------------------------------------------- -// genBitCast - Generate the instruction to move a value between register files -// -// Arguments -// targetType - the destination type -// targetReg - the destination register -// srcType - the source type -// srcReg - the source register -// -void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg) -{ - const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType); - assert(srcFltReg == genIsValidFloatReg(srcReg)); - - const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType); - assert(dstFltReg == genIsValidFloatReg(targetReg)); - - inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true); -} - -//---------------------------------------------------------------------- -// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained -// -// Arguments -// treeNode - the GT_BITCAST for which we're generating code -// -void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) -{ - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - GenTree* op1 = treeNode->gtGetOp1(); - genConsumeRegs(op1); - - if (op1->isContained()) - { - assert(op1->IsLocal() || op1->isIndir()); - if (genIsRegCandidateLocal(op1)) - { - unsigned lclNum = op1->AsLclVar()->GetLclNum(); - GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)), - emitTypeSize(treeNode), targetReg, lclNum, 0); - } - else - { - op1->gtType = treeNode->TypeGet(); - op1->SetRegNum(targetReg); - op1->ClearContained(); - JITDUMP("Changing type of BITCAST source to load directly."); - genCodeForTreeNode(op1); - } - } - else - { - genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum()); - } - genProduceReg(treeNode); -} - //-------------------------------------------------------------------------- // // getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg. // diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 3220193c662a1..67b79448c55e6 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -555,7 +555,7 @@ CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSave #endif // defined(TARGET_ARM64) #endif // DEBUG -#if defined(TARGET_WINDOWS) && defined(TARGET_XARCH) +#if (defined(TARGET_WINDOWS) && defined(TARGET_XARCH)) || defined(TARGET_ARM) CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type. #else CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 0) // Don't allow to enregister locals with struct type diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 75bcbfafbf290..a267361f9143e 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3501,6 +3501,27 @@ void Compiler::lvaSortByRefCount() { lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct)); } + else if (varDsc->lvType == TYP_STRUCT) + { + if (!varDsc->lvRegStruct && !compEnregStructLocals()) + { + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct)); + } + else if (varDsc->lvIsMultiRegArgOrRet()) + { + // Prolog and return generators do not support SIMD<->general register moves. + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStructArg)); + } +#if defined(TARGET_ARM) + else if (varDsc->lvIsParam) + { + // On arm we prespill all struct args, + // TODO-Arm-CQ: keep them in registers, it will need a fix + // to "On the ARM we will spill any incoming struct args" logic in codegencommon. + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStructArg)); + } +#endif // TARGET_ARM + } } if (varDsc->lvIsStructField && (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT)) { diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 134b77281f681..13af2eaa280f7 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -302,6 +302,12 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) // address, not knowing that GT_IND is part of a block op that has containment restrictions. src->AsIndir()->Addr()->ClearContained(); } + else if (src->OperIs(GT_LCL_VAR)) + { + // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register. + const unsigned srcLclNum = src->AsLclVar()->GetLclNum(); + comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(Compiler::DNER_BlockOp)); + } if (blkNode->OperIs(GT_STORE_OBJ)) { From 3077ccd7b481d3a61e6eb76357965d3d44d7af11 Mon Sep 17 00:00:00 2001 From: Sergey Date: Thu, 8 Jul 2021 23:41:40 -0700 Subject: [PATCH 2/4] enable x64 unix. --- src/coreclr/jit/jitconfigvalues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 67b79448c55e6..b6542658b8657 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -555,7 +555,7 @@ CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSave #endif // defined(TARGET_ARM64) #endif // DEBUG -#if (defined(TARGET_WINDOWS) && defined(TARGET_XARCH)) || defined(TARGET_ARM) +#if defined(TARGET_XARCH) || defined(TARGET_ARM) CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type. #else CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 0) // Don't allow to enregister locals with struct type From 40786c5782b2073f1610f135920a1f1dddedfdf0 Mon Sep 17 00:00:00 2001 From: Sergey Date: Fri, 9 Jul 2021 09:34:03 -0700 Subject: [PATCH 3/4] Fix and enable arm64. --- src/coreclr/jit/codegenarm64.cpp | 21 ++++++++++-------- src/coreclr/jit/codegencommon.cpp | 26 +++++++++++++--------- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/jitconfigvalues.h | 5 ----- src/coreclr/jit/lclvars.cpp | 16 +++++++++---- src/coreclr/jit/lsra.cpp | 6 ++--- src/coreclr/jit/lsrabuild.cpp | 37 +++++++++++++++++++++++++++++-- 7 files changed, 80 insertions(+), 33 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 694cb55f4d2bc..b0dd9ad60a8ac 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4378,9 +4378,11 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) { assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave); - GenTree* op1 = simdNode->gtGetOp1(); - assert(op1->IsLocal()); - assert(emitTypeSize(op1->TypeGet()) == 16); + GenTree* op1 = simdNode->gtGetOp1(); + GenTreeLclVar* lclNode = op1->AsLclVar(); + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); + assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); + regNumber targetReg = simdNode->GetRegNum(); regNumber op1Reg = genConsumeReg(op1); assert(op1Reg != REG_NA); @@ -4391,8 +4393,7 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) { // This is not a normal spill; we'll spill it to the lclVar location. // The localVar must have a stack home. - unsigned varNum = op1->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); + unsigned varNum = lclNode->GetLclNum(); assert(varDsc->lvOnFrame); // We want to store this to the upper 8 bytes of this localVar's home. int offset = 8; @@ -4429,16 +4430,18 @@ void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) GenTree* op1 = simdNode->gtGetOp1(); assert(op1->IsLocal()); - assert(emitTypeSize(op1->TypeGet()) == 16); + GenTreeLclVar* lclNode = op1->AsLclVar(); + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); + assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); + regNumber srcReg = simdNode->GetRegNum(); - regNumber lclVarReg = genConsumeReg(op1); - unsigned varNum = op1->AsLclVarCommon()->GetLclNum(); + regNumber lclVarReg = genConsumeReg(lclNode); + unsigned varNum = lclNode->GetLclNum(); assert(lclVarReg != REG_NA); assert(srcReg != REG_NA); if (simdNode->gtFlags & GTF_SPILLED) { // The localVar must have a stack home. - LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); assert(varDsc->lvOnFrame); // We will load this from the upper 8 bytes of this localVar's home. int offset = 8; diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index be9d7f295a958..73e6f196cae11 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3783,7 +3783,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount); - varDsc = compiler->lvaTable + varNum; + varDsc = compiler->lvaTable + varNum; + const var_types varRegType = varDsc->GetRegisterType(); noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); /* cannot possibly have stack arguments */ @@ -3827,7 +3828,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere assert(argNum > 0); assert(regArgTab[argNum - 1].slot == 1); assert(regArgTab[argNum - 1].varNum == varNum); - assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16)); + assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16)); regArgMaskLive &= ~genRegMask(regNum); regArgTab[argNum].circular = false; change = true; @@ -4338,9 +4339,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount); - varDsc = compiler->lvaTable + varNum; - var_types regType = regArgTab[argNum].getRegType(compiler); - regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); + varDsc = compiler->lvaTable + varNum; + const var_types regType = regArgTab[argNum].getRegType(compiler); + const regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); + const var_types varRegType = varDsc->GetRegisterType(); #if defined(UNIX_AMD64_ABI) if (regType == TYP_UNDEF) @@ -4439,7 +4441,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere assert(regArgTab[argNum].slot == 2); assert(argNum > 0); assert(regArgTab[argNum - 1].slot == 1); - assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16)); + assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16)); destRegNum = varDsc->GetRegNum(); noway_assert(regNum != destRegNum); continue; @@ -4509,7 +4511,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(regArgTab[nextArgNum].varNum == varNum); // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg // and moves the 0th element of the src reg into the 1st element of the dest reg. - GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0); + GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varRegType), destRegNum, nextRegNum, 0); // Set destRegNum to regNum so that we skip the setting of the register below, // but mark argNum as processed and clear regNum from the live mask. destRegNum = regNum; @@ -11245,11 +11247,15 @@ void CodeGen::genStructReturn(GenTree* treeNode) assert(regCount <= MAX_RET_REG_COUNT); #if FEATURE_MULTIREG_RET + // Right now the only enregisterable structs supported are SIMD vector types. if (genIsRegCandidateLocal(actualOp1)) { - // Right now the only enregisterable structs supported are SIMD vector types. - assert(varTypeIsSIMD(op1)); - assert(!actualOp1->AsLclVar()->IsMultiReg()); +#if defined(DEBUG) + const GenTreeLclVar* lclVar = actualOp1->AsLclVar(); + const LclVarDsc* varDsc = compiler->lvaGetDesc(lclVar); + assert(varTypeIsSIMD(varDsc->GetRegisterType())); + assert(!lclVar->IsMultiReg()); +#endif // DEBUG #ifdef FEATURE_SIMD genSIMDSplitReturn(op1, &retTypeDesc); #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b60353d8e8440..131d2e2258402 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7631,11 +7631,13 @@ class Compiler #if defined(TARGET_AMD64) static bool varTypeNeedsPartialCalleeSave(var_types type) { + assert(type != TYP_STRUCT); return (type == TYP_SIMD32); } #elif defined(TARGET_ARM64) static bool varTypeNeedsPartialCalleeSave(var_types type) { + assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b6542658b8657..36eaeb6869807 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -555,12 +555,7 @@ CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSave #endif // defined(TARGET_ARM64) #endif // DEBUG -#if defined(TARGET_XARCH) || defined(TARGET_ARM) CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type. -#else -CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 0) // Don't allow to enregister locals with struct type - // yet. -#endif #undef CONFIG_INTEGER #undef CONFIG_STRING diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index a267361f9143e..e247fc56e12cd 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3493,13 +3493,21 @@ void Compiler::lvaSortByRefCount() { varDsc->lvTracked = 0; } - else if ((varDsc->lvType == TYP_STRUCT) && !varDsc->lvRegStruct && !compEnregStructLocals()) + else if (!varDsc->IsEnregisterableType()) { lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct)); } - else if (!varDsc->IsEnregisterableType()) + else if (varDsc->lvType == TYP_STRUCT) { - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct)); + if (!varDsc->lvRegStruct && !compEnregStructLocals()) + { + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct)); + } + else if (varDsc->lvIsMultiRegArgOrRet()) + { + // Prolog and return generators do not support SIMD<->general register moves. + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStructArg)); + } } else if (varDsc->lvType == TYP_STRUCT) { @@ -4170,7 +4178,7 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // TODO-CQ: If the varType needs partial callee save, conservatively do not enregister // such variable. In future, need to enable enregisteration for such variables. - if (!varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (!varTypeNeedsPartialCalleeSave(varDsc->GetRegisterType())) #endif { varDsc->lvSingleDefRegCandidate = true; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index f382b1dcf4634..9491b2c077357 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1809,7 +1809,7 @@ void LinearScan::identifyCandidates() // Additionally, when we are generating code for a target with partial SIMD callee-save // (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the // LargeVectorType vars. - if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->GetRegisterType())) { largeVectorVarCount++; VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex); @@ -6242,7 +6242,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, } LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->GetRegisterType())); // On Arm64, we must always have a register to save the upper half, // while on x86 we can spill directly to memory. @@ -6323,7 +6323,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, // lclVar as spilled). assert(lclVarReg != REG_NA); LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->GetRegisterType())); GenTree* restoreLcl = nullptr; restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index da7cb15b7cd08..7eeddf4ef4f53 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1165,7 +1165,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->GetRegisterType())) { if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) { @@ -1511,7 +1511,40 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end; listNode = listNode->Next()) { - if (Compiler::varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet())) + const GenTree* defNode = listNode->treeNode; + var_types regType = defNode->TypeGet(); + if (regType == TYP_STRUCT) + { + assert(defNode->OperIs(GT_LCL_VAR, GT_CALL)); + if (defNode->OperIs(GT_LCL_VAR)) + { + const GenTreeLclVar* lcl = defNode->AsLclVar(); + const LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); + regType = varDsc->GetRegisterType(); + } + else + { + const GenTreeCall* call = defNode->AsCall(); + const CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd; + Compiler::structPassingKind howToReturnStruct; + regType = compiler->getReturnTypeForStruct(retClsHnd, call->GetUnmanagedCallConv(), &howToReturnStruct); + if (howToReturnStruct == Compiler::SPK_ByValueAsHfa) + { + regType = compiler->GetHfaType(retClsHnd); + } +#if defined(TARGET_ARM64) + else if (howToReturnStruct == Compiler::SPK_ByValue) + { + // TODO-Cleanup: add a new Compiler::SPK for this case. + // This is the case when 16-byte struct is returned as [x0, x1]. + // We don't need a partial callee save. + regType = TYP_LONG; + } +#endif // TARGET_ARM64 + } + assert((regType != TYP_STRUCT) && (regType != TYP_UNDEF)); + } + if (Compiler::varTypeNeedsPartialCalleeSave(regType)) { // In the rare case where such an interval is live across nested calls, we don't need to insert another. if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave) From 806edb4f783659680f116e6537d09a504fa05072 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 14 Jul 2021 17:27:26 -0700 Subject: [PATCH 4/4] fix bad merge and arm32 failures. --- src/coreclr/jit/lclvars.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index e247fc56e12cd..0a852a43d0deb 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3508,18 +3508,6 @@ void Compiler::lvaSortByRefCount() // Prolog and return generators do not support SIMD<->general register moves. lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStructArg)); } - } - else if (varDsc->lvType == TYP_STRUCT) - { - if (!varDsc->lvRegStruct && !compEnregStructLocals()) - { - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct)); - } - else if (varDsc->lvIsMultiRegArgOrRet()) - { - // Prolog and return generators do not support SIMD<->general register moves. - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStructArg)); - } #if defined(TARGET_ARM) else if (varDsc->lvIsParam) {