Skip to content

Commit

Permalink
Enregister EH var that are single def (#47307)
Browse files Browse the repository at this point in the history
* Enable EhWriteThry for SingleDef

* If EhWriteThru is enabled, DoNotEnregister if variable is not singleDef

* Revert code in ExecutionContext.RunInternal

* Revert code in AsyncMethodBuildCore.Start()

* Make sure we do not reset lvSingleDef

* Consitent display of frame offset

misc change in superpmi.py

* Use lvEHWriteThruCandidate

* Do not enregister EH Var that has single use

* do not enregister simdtype

* add missing comments

* jit format

* revert an unintended change

* jit format

* Add missing comments
  • Loading branch information
kunalspathak committed Mar 9, 2021
1 parent ccf2de0 commit 24b0228
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 71 deletions.
23 changes: 23 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,11 @@ class LclVarDsc
// before lvaMarkLocalVars: identifies ref type locals that can get type updates
// after lvaMarkLocalVars: identifies locals that are suitable for optAddCopies

unsigned char lvEhWriteThruCandidate : 1; // variable has a single def and hence is a register candidate if
// if it is an EH variable

unsigned char lvDisqualifyForEhWriteThru : 1; // tracks variable that are disqualified from register candidancy

#if ASSERTION_PROP
unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization
unsigned char lvVolatileHint : 1; // hint for AssertionProp
Expand Down Expand Up @@ -7369,6 +7374,24 @@ class Compiler

void raMarkStkVars();

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#if defined(TARGET_AMD64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
return (type == TYP_SIMD32);
}
#elif defined(TARGET_ARM64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
}
#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#error("Unknown target architecture for FEATURE_SIMD")
#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE

protected:
// Some things are used by both LSRA and regpredict allocators.

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Enable POPCNT
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0)
#endif // !defined(TARGET_AMD64) && !defined(TARGET_X86)

CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 0) // Enable the register allocator to support EH-write thru:
CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru:
// partial enregistration of vars exposed on EH boundaries
CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Enable the enregistration of locals that are
// defined or used in a multireg context.
Expand Down
48 changes: 43 additions & 5 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2614,14 +2614,16 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum)
{
noway_assert(lvaTable[i].lvIsStructField);
lvaTable[i].lvLiveInOutOfHndlr = 1;
if (!lvaEnregEHVars)
// For now, only enregister an EH Var if it is a single def and whose refCnt > 1.
if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate || lvaTable[i].lvRefCnt() <= 1)
{
lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler));
}
}
}

if (!lvaEnregEHVars)
// For now, only enregister an EH Var if it is a single def and whose refCnt > 1.
if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate || varDsc->lvRefCnt() <= 1)
{
lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler));
}
Expand Down Expand Up @@ -4040,7 +4042,7 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt,

/* Record if the variable has a single def or not */

if (!varDsc->lvDisqualify) // If this variable is already disqualified we can skip this
if (!varDsc->lvDisqualify) // If this variable is already disqualified, we can skip this
{
if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable
{
Expand Down Expand Up @@ -4075,6 +4077,34 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt,
BlockSetOps::AddElemD(this, varDsc->lvRefBlks, block->bbNum);
}
}

if (!varDsc->lvDisqualifyForEhWriteThru) // If this EH var already disqualified, we can skip this
{
if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable
{
bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0;
bool bbIsReturn = block->bbJumpKind == BBJ_RETURN;
bool needsExplicitZeroInit = fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn);

if (varDsc->lvEhWriteThruCandidate || needsExplicitZeroInit)
{
varDsc->lvEhWriteThruCandidate = false;
varDsc->lvDisqualifyForEhWriteThru = true;
}
else
{
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// TODO-CQ: If the varType needs partial callee save, conservatively do not enregister
// such variable. In future, need to enable enregisteration for such variables.
if (!varTypeNeedsPartialCalleeSave(varDsc->lvType))
#endif
{
varDsc->lvEhWriteThruCandidate = true;
}
}
}
}

#endif // ASSERTION_PROP

bool allowStructs = false;
Expand Down Expand Up @@ -4178,6 +4208,8 @@ void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute)

Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
// TODO: Stop passing isRecompute once we are sure that this assert is never hit.
assert(!m_isRecompute);
m_compiler->lvaMarkLclRefs(*use, m_block, m_stmt, m_isRecompute);
return WALK_CONTINUE;
}
Expand Down Expand Up @@ -4437,7 +4469,13 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)

// Set initial value for lvSingleDef for explicit and implicit
// argument locals as they are "defined" on entry.
varDsc->lvSingleDef = varDsc->lvIsParam;
// However, if we are just recomputing the ref counts, retain the value
// that was set by past phases.
if (!isRecompute)
{
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvEhWriteThruCandidate = varDsc->lvIsParam;
}
}

// Remember current state of generic context use, and prepare
Expand Down Expand Up @@ -7194,7 +7232,7 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum)
baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
#endif

printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
printf("[%2s%1s%02XH] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
}

/*****************************************************************************
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1781,7 +1781,7 @@ void LinearScan::identifyCandidates()

if (varDsc->lvLiveInOutOfHndlr)
{
newInt->isWriteThru = true;
newInt->isWriteThru = varDsc->lvEhWriteThruCandidate;
setIntervalAsSpilled(newInt);
}

Expand All @@ -1796,7 +1796,7 @@ void LinearScan::identifyCandidates()
// Additionally, when we are generating code for a target with partial SIMD callee-save
// (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the
// LargeVectorType vars.
if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType))
{
largeVectorVarCount++;
VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
Expand Down Expand Up @@ -5050,7 +5050,7 @@ void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// Ensure that we have no partially-spilled large vector locals.
assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
Expand Down Expand Up @@ -6922,7 +6922,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree,
}

LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType));

// On Arm64, we must always have a register to save the upper half,
// while on x86 we can spill directly to memory.
Expand Down Expand Up @@ -7003,7 +7003,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree,
// lclVar as spilled).
assert(lclVarReg != REG_NA);
LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType));

GenTree* restoreLcl = nullptr;
restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
Expand Down
17 changes: 2 additions & 15 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,7 @@ class LinearScan : public LinearScanInterface

void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);

void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
void buildRefPositionsForNode(GenTree* tree, LsraLocation loc);

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet);
Expand Down Expand Up @@ -1497,23 +1497,10 @@ class LinearScan : public LinearScanInterface

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#if defined(TARGET_AMD64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
return (type == TYP_SIMD32);
}
static const var_types LargeVectorSaveType = TYP_SIMD16;
#elif defined(TARGET_ARM64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
}
static const var_types LargeVectorSaveType = TYP_DOUBLE;
#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#error("Unknown target architecture for FEATURE_SIMD")
static const var_types LargeVectorSaveType = TYP_DOUBLE;
#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)

// Set of large vector (TYP_SIMD32 on AVX) variables.
VARSET_TP largeVectorVars;
// Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
Expand Down
13 changes: 6 additions & 7 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
{
LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType))
{
if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
{
Expand Down Expand Up @@ -1424,7 +1424,7 @@ void LinearScan::buildInternalRegisterUses()
void LinearScan::makeUpperVectorInterval(unsigned varIndex)
{
Interval* lclVarInterval = getIntervalForLocalVar(varIndex);
assert(varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
assert(Compiler::varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
Interval* newInt = newInterval(LargeVectorSaveType);
newInt->relatedInterval = lclVarInterval;
newInt->isUpperVector = true;
Expand Down Expand Up @@ -1506,7 +1506,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu
for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
listNode = listNode->Next())
{
if (varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
if (Compiler::varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
{
// In the rare case where such an interval is live across nested calls, we don't need to insert another.
if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave)
Expand Down Expand Up @@ -1637,10 +1637,9 @@ int LinearScan::ComputeAvailableSrcCount(GenTree* node)
//
// Arguments:
// tree - The node for which we are building RefPositions
// block - The BasicBlock in which the node resides
// currentLoc - The LsraLocation of the given node
//
void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc)
void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc)
{
// The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
// GT_CLS_VAR nodes should have been eliminated by rationalizer.
Expand Down Expand Up @@ -2351,7 +2350,7 @@ void LinearScan::buildIntervals()
node->SetRegNum(node->GetRegNum());
#endif

buildRefPositionsForNode(node, block, currentLoc);
buildRefPositionsForNode(node, currentLoc);

#ifdef DEBUG
if (currentLoc > maxNodeLocation)
Expand Down Expand Up @@ -3232,7 +3231,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
def->regOptional = true;
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType))
if (Compiler::varTypeNeedsPartialCalleeSave(varDefInterval->registerType))
{
varDefInterval->isPartiallySpilled = false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/scripts/superpmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,7 @@ def save_repro_mc_files(temp_location, coreclr_args, repro_base_command_line):
shutil.copy2(item, repro_location)

logging.info("")
logging.info("Repro .mc files created for failures:")
logging.info("Repro {} .mc file(s) created for failures:".format(len(repro_files)))
for item in repro_files:
logging.info(item)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,31 @@ internal static class AsyncMethodBuilderCore // debugger depends on this exact n
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.stateMachine);
}

// enregistrer variables with 0 post-fix so they can be used in registers without EH forcing them to stack
// Capture references to Thread Contexts
Thread currentThread0 = Thread.CurrentThread;
Thread currentThread = currentThread0;
ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext;
Thread currentThread = Thread.CurrentThread;

// Store current ExecutionContext and SynchronizationContext as "previousXxx".
// This allows us to restore them and undo any Context changes made in stateMachine.MoveNext
// so that they won't "leak" out of the first await.
ExecutionContext? previousExecutionCtx = previousExecutionCtx0;
SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext;
ExecutionContext? previousExecutionCtx = currentThread._executionContext;
SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext;

try
{
stateMachine.MoveNext();
}
finally
{
// Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack
SynchronizationContext? previousSyncCtx1 = previousSyncCtx;
Thread currentThread1 = currentThread;
// The common case is that these have not changed, so avoid the cost of a write barrier if not needed.
if (previousSyncCtx1 != currentThread1._synchronizationContext)
if (previousSyncCtx != currentThread._synchronizationContext)
{
// Restore changed SynchronizationContext back to previous
currentThread1._synchronizationContext = previousSyncCtx1;
currentThread._synchronizationContext = previousSyncCtx;
}

ExecutionContext? previousExecutionCtx1 = previousExecutionCtx;
ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext;
if (previousExecutionCtx1 != currentExecutionCtx1)
ExecutionContext? currentExecutionCtx = currentThread._executionContext;
if (previousExecutionCtx != currentExecutionCtx)
{
ExecutionContext.RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1);
ExecutionContext.RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx);
}
}
}
Expand Down
Loading

0 comments on commit 24b0228

Please sign in to comment.