diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a38071f31a369..7fae2166e8e29 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -454,6 +454,11 @@ class LclVarDsc // before lvaMarkLocalVars: identifies ref type locals that can get type updates // after lvaMarkLocalVars: identifies locals that are suitable for optAddCopies + unsigned char lvEhWriteThruCandidate : 1; // variable has a single def and hence is a register candidate if + // if it is an EH variable + + unsigned char lvDisqualifyForEhWriteThru : 1; // tracks variable that are disqualified from register candidancy + #if ASSERTION_PROP unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization unsigned char lvVolatileHint : 1; // hint for AssertionProp @@ -7369,6 +7374,24 @@ class Compiler void raMarkStkVars(); +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#if defined(TARGET_AMD64) + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + return (type == TYP_SIMD32); + } +#elif defined(TARGET_ARM64) + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes + // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. + return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); + } +#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) +#error("Unknown target architecture for FEATURE_SIMD") +#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + protected: // Some things are used by both LSRA and regpredict allocators. diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 8019c61e0496d..77d1b627a44a6 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -274,7 +274,7 @@ CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Enable POPCNT CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) #endif // !defined(TARGET_AMD64) && !defined(TARGET_X86) -CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 0) // Enable the register allocator to support EH-write thru: +CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru: // partial enregistration of vars exposed on EH boundaries CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Enable the enregistration of locals that are // defined or used in a multireg context. diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index ea51815de2e03..ca6e34a083e1c 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2614,14 +2614,16 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) { noway_assert(lvaTable[i].lvIsStructField); lvaTable[i].lvLiveInOutOfHndlr = 1; - if (!lvaEnregEHVars) + // For now, only enregister an EH Var if it is a single def and whose refCnt > 1. + if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate || lvaTable[i].lvRefCnt() <= 1) { lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); } } } - if (!lvaEnregEHVars) + // For now, only enregister an EH Var if it is a single def and whose refCnt > 1. + if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate || varDsc->lvRefCnt() <= 1) { lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); } @@ -4040,7 +4042,7 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, /* Record if the variable has a single def or not */ - if (!varDsc->lvDisqualify) // If this variable is already disqualified we can skip this + if (!varDsc->lvDisqualify) // If this variable is already disqualified, we can skip this { if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable { @@ -4075,6 +4077,34 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, BlockSetOps::AddElemD(this, varDsc->lvRefBlks, block->bbNum); } } + + if (!varDsc->lvDisqualifyForEhWriteThru) // If this EH var already disqualified, we can skip this + { + if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable + { + bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; + bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; + bool needsExplicitZeroInit = fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn); + + if (varDsc->lvEhWriteThruCandidate || needsExplicitZeroInit) + { + varDsc->lvEhWriteThruCandidate = false; + varDsc->lvDisqualifyForEhWriteThru = true; + } + else + { +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // TODO-CQ: If the varType needs partial callee save, conservatively do not enregister + // such variable. In future, need to enable enregisteration for such variables. + if (!varTypeNeedsPartialCalleeSave(varDsc->lvType)) +#endif + { + varDsc->lvEhWriteThruCandidate = true; + } + } + } + } + #endif // ASSERTION_PROP bool allowStructs = false; @@ -4178,6 +4208,8 @@ void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute) Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { + // TODO: Stop passing isRecompute once we are sure that this assert is never hit. + assert(!m_isRecompute); m_compiler->lvaMarkLclRefs(*use, m_block, m_stmt, m_isRecompute); return WALK_CONTINUE; } @@ -4437,7 +4469,13 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) // Set initial value for lvSingleDef for explicit and implicit // argument locals as they are "defined" on entry. - varDsc->lvSingleDef = varDsc->lvIsParam; + // However, if we are just recomputing the ref counts, retain the value + // that was set by past phases. + if (!isRecompute) + { + varDsc->lvSingleDef = varDsc->lvIsParam; + varDsc->lvEhWriteThruCandidate = varDsc->lvIsParam; + } } // Remember current state of generic context use, and prepare @@ -7194,7 +7232,7 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum) baseReg = EBPbased ? REG_FPBASE : REG_SPBASE; #endif - printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + printf("[%2s%1s%02XH] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); } /***************************************************************************** diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 31127e89afd75..cd4a48bcdfe5d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1781,7 +1781,7 @@ void LinearScan::identifyCandidates() if (varDsc->lvLiveInOutOfHndlr) { - newInt->isWriteThru = true; + newInt->isWriteThru = varDsc->lvEhWriteThruCandidate; setIntervalAsSpilled(newInt); } @@ -1796,7 +1796,7 @@ void LinearScan::identifyCandidates() // Additionally, when we are generating code for a target with partial SIMD callee-save // (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the // LargeVectorType vars. - if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)) { largeVectorVarCount++; VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex); @@ -5050,7 +5050,7 @@ void LinearScan::processBlockEndLocations(BasicBlock* currentBlock) } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // Ensure that we have no partially-spilled large vector locals. - assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled); + assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB)); @@ -6922,7 +6922,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, } LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(varTypeNeedsPartialCalleeSave(varDsc->lvType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)); // On Arm64, we must always have a register to save the upper half, // while on x86 we can spill directly to memory. @@ -7003,7 +7003,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, // lclVar as spilled). assert(lclVarReg != REG_NA); LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(varTypeNeedsPartialCalleeSave(varDsc->lvType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)); GenTree* restoreLcl = nullptr; restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 345cbb451f788..7e41701aca5e0 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -988,7 +988,7 @@ class LinearScan : public LinearScanInterface void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); - void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc); + void buildRefPositionsForNode(GenTree* tree, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet); @@ -1500,23 +1500,10 @@ class LinearScan : public LinearScanInterface #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(TARGET_AMD64) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { - return (type == TYP_SIMD32); - } static const var_types LargeVectorSaveType = TYP_SIMD16; #elif defined(TARGET_ARM64) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { - // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes - // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); - } - static const var_types LargeVectorSaveType = TYP_DOUBLE; -#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) -#error("Unknown target architecture for FEATURE_SIMD") + static const var_types LargeVectorSaveType = TYP_DOUBLE; #endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) - // Set of large vector (TYP_SIMD32 on AVX) variables. VARSET_TP largeVectorVars; // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 88430f70fe771..7d21d34e4a68b 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1160,7 +1160,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType)) { if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) { @@ -1424,7 +1424,7 @@ void LinearScan::buildInternalRegisterUses() void LinearScan::makeUpperVectorInterval(unsigned varIndex) { Interval* lclVarInterval = getIntervalForLocalVar(varIndex); - assert(varTypeNeedsPartialCalleeSave(lclVarInterval->registerType)); + assert(Compiler::varTypeNeedsPartialCalleeSave(lclVarInterval->registerType)); Interval* newInt = newInterval(LargeVectorSaveType); newInt->relatedInterval = lclVarInterval; newInt->isUpperVector = true; @@ -1506,7 +1506,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end; listNode = listNode->Next()) { - if (varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet())) + if (Compiler::varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet())) { // In the rare case where such an interval is live across nested calls, we don't need to insert another. if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave) @@ -1637,10 +1637,9 @@ int LinearScan::ComputeAvailableSrcCount(GenTree* node) // // Arguments: // tree - The node for which we are building RefPositions -// block - The BasicBlock in which the node resides // currentLoc - The LsraLocation of the given node // -void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc) +void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc) { // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. // GT_CLS_VAR nodes should have been eliminated by rationalizer. @@ -2351,7 +2350,7 @@ void LinearScan::buildIntervals() node->SetRegNum(node->GetRegNum()); #endif - buildRefPositionsForNode(node, block, currentLoc); + buildRefPositionsForNode(node, currentLoc); #ifdef DEBUG if (currentLoc > maxNodeLocation) @@ -3232,7 +3231,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, def->regOptional = true; } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType)) + if (Compiler::varTypeNeedsPartialCalleeSave(varDefInterval->registerType)) { varDefInterval->isPartiallySpilled = false; } diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 66beb3f1213d5..5f1edd95270ea 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1346,7 +1346,7 @@ def save_repro_mc_files(temp_location, coreclr_args, repro_base_command_line): shutil.copy2(item, repro_location) logging.info("") - logging.info("Repro .mc files created for failures:") + logging.info("Repro {} .mc file(s) created for failures:".format(len(repro_files))) for item in repro_files: logging.info(item) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs index 6d598e9929621..19b3d82686671 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncMethodBuilderCore.cs @@ -25,17 +25,13 @@ internal static class AsyncMethodBuilderCore // debugger depends on this exact n ThrowHelper.ThrowArgumentNullException(ExceptionArgument.stateMachine); } - // enregistrer variables with 0 post-fix so they can be used in registers without EH forcing them to stack - // Capture references to Thread Contexts - Thread currentThread0 = Thread.CurrentThread; - Thread currentThread = currentThread0; - ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext; + Thread currentThread = Thread.CurrentThread; // Store current ExecutionContext and SynchronizationContext as "previousXxx". // This allows us to restore them and undo any Context changes made in stateMachine.MoveNext // so that they won't "leak" out of the first await. - ExecutionContext? previousExecutionCtx = previousExecutionCtx0; - SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext; + ExecutionContext? previousExecutionCtx = currentThread._executionContext; + SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext; try { @@ -43,21 +39,17 @@ internal static class AsyncMethodBuilderCore // debugger depends on this exact n } finally { - // Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack - SynchronizationContext? previousSyncCtx1 = previousSyncCtx; - Thread currentThread1 = currentThread; // The common case is that these have not changed, so avoid the cost of a write barrier if not needed. - if (previousSyncCtx1 != currentThread1._synchronizationContext) + if (previousSyncCtx != currentThread._synchronizationContext) { // Restore changed SynchronizationContext back to previous - currentThread1._synchronizationContext = previousSyncCtx1; + currentThread._synchronizationContext = previousSyncCtx; } - ExecutionContext? previousExecutionCtx1 = previousExecutionCtx; - ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext; - if (previousExecutionCtx1 != currentExecutionCtx1) + ExecutionContext? currentExecutionCtx = currentThread._executionContext; + if (previousExecutionCtx != currentExecutionCtx) { - ExecutionContext.RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1); + ExecutionContext.RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs index e1950bd48587d..30b6f9e2561e3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs @@ -153,23 +153,18 @@ internal static void RunInternal(ExecutionContext? executionContext, ContextCall // Note: Manual enregistering may be addressed by "Exception Handling Write Through Optimization" // https://github.com/dotnet/runtime/blob/main/docs/design/features/eh-writethru.md - // Enregister variables with 0 post-fix so they can be used in registers without EH forcing them to stack - // Capture references to Thread Contexts - Thread currentThread0 = Thread.CurrentThread; - Thread currentThread = currentThread0; - ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext; + // Enregister previousExecutionCtx0 so they can be used in registers without EH forcing them to stack + + Thread currentThread = Thread.CurrentThread; + ExecutionContext? previousExecutionCtx0 = currentThread._executionContext; if (previousExecutionCtx0 != null && previousExecutionCtx0.m_isDefault) { // Default is a null ExecutionContext internally previousExecutionCtx0 = null; } - // Store current ExecutionContext and SynchronizationContext as "previousXxx". - // This allows us to restore them and undo any Context changes made in callback.Invoke - // so that they won't "leak" back into caller. - // These variables will cross EH so be forced to stack ExecutionContext? previousExecutionCtx = previousExecutionCtx0; - SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext; + SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext; if (executionContext != null && executionContext.m_isDefault) { @@ -177,9 +172,9 @@ internal static void RunInternal(ExecutionContext? executionContext, ContextCall executionContext = null; } - if (previousExecutionCtx0 != executionContext) + if (previousExecutionCtx != executionContext) { - RestoreChangedContextToThread(currentThread0, executionContext, previousExecutionCtx0); + RestoreChangedContextToThread(currentThread, executionContext, previousExecutionCtx); } ExceptionDispatchInfo? edi = null; @@ -195,21 +190,17 @@ internal static void RunInternal(ExecutionContext? executionContext, ContextCall edi = ExceptionDispatchInfo.Capture(ex); } - // Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack - SynchronizationContext? previousSyncCtx1 = previousSyncCtx; - Thread currentThread1 = currentThread; // The common case is that these have not changed, so avoid the cost of a write barrier if not needed. - if (currentThread1._synchronizationContext != previousSyncCtx1) + if (currentThread._synchronizationContext != previousSyncCtx) { // Restore changed SynchronizationContext back to previous - currentThread1._synchronizationContext = previousSyncCtx1; + currentThread._synchronizationContext = previousSyncCtx; } - ExecutionContext? previousExecutionCtx1 = previousExecutionCtx; - ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext; - if (currentExecutionCtx1 != previousExecutionCtx1) + ExecutionContext? currentExecutionCtx = currentThread._executionContext; + if (currentExecutionCtx != previousExecutionCtx) { - RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1); + RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx); } // If exception was thrown by callback, rethrow it now original contexts are restored