From 1b7ff390d4ebb2a5086b6f815325fc96af113c63 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 9 Jun 2022 17:10:26 +0200 Subject: [PATCH 1/3] JIT: Optimize expansion of indir cell addresses for CFG On ARM64 it is expensive to materialize the address of an indirection cell since that requires multiple instructions. This is particular a problem when CFG is enabled where validation + call uses the indir cell twice. This changes the CFG logic to create a local in this case. We also were forcing the indir cell argument node into the register. This is not ideal for this particular case, so remove this logic and let LSRA deal with it. This also required fixing a bug where LSRA on ARM32 would not properly kill the callee-saved indir cell register for R2R calls. Fix #65076 --- src/coreclr/jit/lower.cpp | 56 ++++++++++++++++++++++++++++++++--- src/coreclr/jit/lsrabuild.cpp | 14 ++++++++- src/coreclr/jit/morph.cpp | 14 ++------- src/coreclr/jit/valuenum.cpp | 15 ++++------ 4 files changed, 73 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 98bd11f0fd2d3..741da8e6c4432 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2283,10 +2283,58 @@ void Lowering::LowerCFGCall(GenTreeCall* call) } GenTree* callTarget = call->gtCallType == CT_INDIRECT ? call->gtCallAddr : call->gtControlExpr; - if ((callTarget == nullptr) || callTarget->IsIntegralConst()) + if (callTarget == nullptr) { - // This is a direct call, no CFG check is necessary. - return; + assert((call->gtCallType != CT_INDIRECT) && (!call->IsVirtual() || call->IsVirtualStubRelativeIndir())); + if (!call->IsVirtual()) + { + // Direct call with stashed address + return; + } + + // This is a VSD call with the call target being null because we are + // supposed to load it from the indir cell. Due to CFG we will need + // this address twice, and at least on ARM64 we do not want to + // materialize the constant both times. + CallArg* indirCellArg = call->gtArgs.FindWellKnownArg(WellKnownArg::VirtualStubCell); + assert((indirCellArg != nullptr) && indirCellArg->GetNode()->OperIs(GT_PUTARG_REG)); + + GenTreeOp* putArgNode = indirCellArg->GetNode()->AsOp(); + LIR::Use indirCellArgUse(BlockRange(), &putArgNode->gtOp1, putArgNode); + + // On non-xarch, we create a local even for constants. On xarch cloning + // the constant is better since it can be contained in the load below. + bool cloneConsts = false; +#ifdef TARGET_XARCH + cloneConsts = true; +#endif + + GenTree* indirCellClone; + + if (indirCellArgUse.Def()->OperIs(GT_LCL_VAR) || (cloneConsts && indirCellArgUse.Def()->IsCnsIntOrI())) + { + indirCellClone = comp->gtClone(indirCellArgUse.Def()); + } + else + { + unsigned newLcl = indirCellArgUse.ReplaceWithLclVar(comp); + indirCellClone = comp->gtNewLclvNode(newLcl, TYP_I_IMPL); + } + + callTarget = Ind(indirCellClone); + LIR::Range controlExprRange = LIR::SeqTree(comp, callTarget); + ContainCheckRange(controlExprRange); + + BlockRange().InsertBefore(call, std::move(controlExprRange)); + call->gtControlExpr = callTarget; + } + else + { + if (callTarget->IsIntegralConst()) + { + // This is a direct call, no CFG check is necessary. + return; + } } CFGCallKind cfgKind = call->GetCFGCallKind(); @@ -5127,7 +5175,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // Skip inserting the indirection node to load the address that is already // computed in the VSD stub arg register as a hidden parameter. Instead during the // codegen, just load the call target from there. - shouldOptimizeVirtualStubCall = !comp->opts.IsCFGEnabled(); + shouldOptimizeVirtualStubCall = true; #endif if (!shouldOptimizeVirtualStubCall) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 9c29ca4c1dc23..922422ac8c37d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -893,8 +893,20 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) killMask &= ~RBM_FLT_CALLEE_TRASH; } #ifdef TARGET_ARM - if (call->IsVirtualStub()) + // Indirection cell arg may go in callee saved register on ARM. + // Technically these may not be killed by the call, but we have special + // handling for arguments to keep their registers busy until they are + // killed expecting to see this kill at the call site. + WellKnownArg indirCellKind = call->GetIndirectionCellArgKind(); + if (indirCellKind != WellKnownArg::None) { +#ifdef DEBUG + // Assume the different indir cells use the same register to avoid + // having to find the arg in release builds, but assert it here. + CallArg* indirCellArg = call->gtArgs.FindWellKnownArg(indirCellKind); + assert((indirCellArg != nullptr) && + (indirCellArg->AbiInfo.GetRegNum() == compiler->virtualStubParamInfo->GetReg())); +#endif killMask |= compiler->virtualStubParamInfo->GetRegMask(); } #else // !TARGET_ARM diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ec5d602d5eb64..fae3127d8fd06 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2110,18 +2110,11 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call #ifdef DEBUG indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; #endif - indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM); -#ifdef TARGET_ARM - // Issue #xxxx : Don't attempt to CSE this constant on ARM32 - // - // This constant has specific register requirements, and LSRA doesn't currently correctly - // handle them when the value is in a CSE'd local. - indirectCellAddress->SetDoNotCSE(); -#endif // TARGET_ARM // Push the stub address onto the list of arguments. - InsertAfterThisOrFirst(comp, - NewCallArg::Primitive(indirectCellAddress).WellKnown(WellKnownArg::R2RIndirectionCell)); + NewCallArg indirCellAddrArg = + NewCallArg::Primitive(indirectCellAddress).WellKnown(WellKnownArg::R2RIndirectionCell); + InsertAfterThisOrFirst(comp, indirCellAddrArg); } #endif @@ -7938,7 +7931,6 @@ GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) #endif } assert(stubAddrArg != nullptr); - stubAddrArg->SetRegNum(virtualStubParamInfo->GetReg()); return stubAddrArg; } diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index be7fc14611f0f..d58f996d6f50b 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9650,19 +9650,14 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet())); } -#if defined(FEATURE_READYTORUN) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)) - if (call->IsR2RRelativeIndir()) + if (call->GetIndirectionCellArgKind() != WellKnownArg::None) { -#ifdef DEBUG - GenTree* indirectCellAddress = args->GetArgByIndex(0)->GetNode(); - assert(indirectCellAddress->IsCnsIntOrI() && indirectCellAddress->GetRegNum() == REG_R2R_INDIRECT_PARAM); -#endif // DEBUG - - // For ARM indirectCellAddress is consumed by the call itself, so it should have added as an implicit argument - // in morph. So we do not need to use EntryPointAddrAsArg0, because arg0 is already an entry point addr. + // If we are VN'ing a call with indirection cell arg (e.g. because this + // is a helper in a R2R compilation) then morph should already have + // added this arg, so we do not need to use EntryPointAddrAsArg0 + // because the indirection cell itself allows us to disambiguate. useEntryPointAddrAsArg0 = false; } -#endif // FEATURE_READYTORUN && (TARGET_ARMARCH || TARGET_LOONGARCH64) CallArg* curArg = &*args->Args().begin(); if (nArgs == 0) From 8fc46d3047ae2ba43b7a1c79f9ed02521d8fba10 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 9 Jun 2022 21:03:15 +0200 Subject: [PATCH 2/3] Fix GetIndirectionCellArgKind for delegates These are actual invocations of the Invoke method so they have the normal flags/entrypoints set, but we do not add indirection cells for them. --- src/coreclr/jit/gentree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b9ba82532a554..ad163c17ee727 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5308,7 +5308,7 @@ struct GenTreeCall final : public GenTree #if defined(TARGET_ARMARCH) // For ARM architectures, we always use an indirection cell for R2R calls. - if (IsR2RRelativeIndir()) + if (IsR2RRelativeIndir() && !IsDelegateInvoke()) { return WellKnownArg::R2RIndirectionCell; } From 6ef03b4d112b8915a41acf54cdd10a1c7bd3f3b9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 10 Jun 2022 12:28:48 +0200 Subject: [PATCH 3/3] Revert ARM LSRA changes These cause some larger than expected regressions so will open a separate issue for it. --- src/coreclr/jit/lsrabuild.cpp | 14 +------------- src/coreclr/jit/morph.cpp | 11 ++++++++++- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 922422ac8c37d..9c29ca4c1dc23 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -893,20 +893,8 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) killMask &= ~RBM_FLT_CALLEE_TRASH; } #ifdef TARGET_ARM - // Indirection cell arg may go in callee saved register on ARM. - // Technically these may not be killed by the call, but we have special - // handling for arguments to keep their registers busy until they are - // killed expecting to see this kill at the call site. - WellKnownArg indirCellKind = call->GetIndirectionCellArgKind(); - if (indirCellKind != WellKnownArg::None) + if (call->IsVirtualStub()) { -#ifdef DEBUG - // Assume the different indir cells use the same register to avoid - // having to find the arg in release builds, but assert it here. - CallArg* indirCellArg = call->gtArgs.FindWellKnownArg(indirCellKind); - assert((indirCellArg != nullptr) && - (indirCellArg->AbiInfo.GetRegNum() == compiler->virtualStubParamInfo->GetReg())); -#endif killMask |= compiler->virtualStubParamInfo->GetRegMask(); } #else // !TARGET_ARM diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index fae3127d8fd06..6f2d990141c80 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2111,6 +2111,15 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; #endif +#ifdef TARGET_ARM + // TODO-ARM: We currently do not properly kill this register in LSRA + // (see getKillSetForCall which does so only for VSD calls). + // We should be able to remove these two workarounds once we do so, + // however when this was tried there were significant regressions. + indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM); + indirectCellAddress->SetDoNotCSE(); +#endif + // Push the stub address onto the list of arguments. NewCallArg indirCellAddrArg = NewCallArg::Primitive(indirectCellAddress).WellKnown(WellKnownArg::R2RIndirectionCell); @@ -7911,7 +7920,7 @@ void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) // call - a call that needs virtual stub dispatching. // // Return Value: -// addr tree with set resister requirements. +// addr tree // GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) {