Skip to content

Commit

Permalink
do not enregister simdtype
Browse files Browse the repository at this point in the history
  • Loading branch information
kunalspathak committed Mar 4, 2021
1 parent 24b0af6 commit ba57957
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 42 deletions.
18 changes: 18 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7373,6 +7373,24 @@ class Compiler

void raMarkStkVars();

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#if defined(TARGET_AMD64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
return (type == TYP_SIMD32);
}
#elif defined(TARGET_ARM64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
}
#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#error("Unknown target architecture for FEATURE_SIMD")
#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE

protected:
// Some things are used by both LSRA and regpredict allocators.

Expand Down
13 changes: 10 additions & 3 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4089,12 +4089,19 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt,
if ((varDsc->lvEhWriteThruCandidate == true) || (needsExplicitZeroInit == true) ||
(tree->gtFlags & GTF_COLON_COND) || (tree->gtFlags & GTF_VAR_USEASG))
{
varDsc->lvEhWriteThruCandidate = false;
varDsc->lvEhWriteThruCandidate = false;
varDsc->lvDisqualifyForEhWriteThru = true;
}
else
{
varDsc->lvEhWriteThruCandidate = true;
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// TODO-CQ: If the varType needs partial callee save, conservatively do not enregister
// such variable. In future, need to enable enregisteration for such variables.
if (!varTypeNeedsPartialCalleeSave(varDsc->lvType))
#endif
{
varDsc->lvEhWriteThruCandidate = true;
}
}
}
}
Expand Down Expand Up @@ -4467,7 +4474,7 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
// that was set by past phases.
if (!isRecompute)
{
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvEhWriteThruCandidate = varDsc->lvIsParam;
}
}
Expand Down
23 changes: 5 additions & 18 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,6 @@ void LinearScan::setBlockSequence()
}
}


if (!block->isBBCallAlwaysPairTail() &&
(predBlock->hasEHBoundaryOut() || predBlock->isBBCallAlwaysPairTail()))
{
Expand Down Expand Up @@ -1797,7 +1796,7 @@ void LinearScan::identifyCandidates()
// Additionally, when we are generating code for a target with partial SIMD callee-save
// (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the
// LargeVectorType vars.
if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType))
{
largeVectorVarCount++;
VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
Expand Down Expand Up @@ -5051,7 +5050,7 @@ void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// Ensure that we have no partially-spilled large vector locals.
assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
Expand Down Expand Up @@ -6923,7 +6922,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree,
}

LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType));

// On Arm64, we must always have a register to save the upper half,
// while on x86 we can spill directly to memory.
Expand Down Expand Up @@ -7004,7 +7003,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree,
// lclVar as spilled).
assert(lclVarReg != REG_NA);
LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType));

GenTree* restoreLcl = nullptr;
restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
Expand Down Expand Up @@ -7069,7 +7068,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree,
}
else
{
assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS || block->bbJumpKind == BBJ_THROW);
blockRange.InsertAtEnd(LIR::SeqTree(compiler, simdNode));
}
}
Expand Down Expand Up @@ -7950,18 +7949,6 @@ void LinearScan::insertMove(
// These block kinds don't have a branch at the end.
assert((lastNode == nullptr) || (!lastNode->OperIsConditionalJump() &&
!lastNode->OperIs(GT_SWITCH_TABLE, GT_SWITCH, GT_RETURN, GT_RETFILT)));

/*if (lastNode != nullptr && lastNode->OperIs(GT_STORE_LCL_VAR))
{
regNumber prevToReg = lastNode->GetReg();
GenTree* op1 = lastNode->gtGetOp1();
regNumber prevFromReg = op1->GetReg();
if (op1->TypeGet() == varDsc->TypeGet() && (fromReg == prevToReg) && (toReg == prevFromReg))
{
JITDUMP("Skipping redundant resoltion");
return;
}
}*/
blockRange.InsertAtEnd(std::move(treeRange));
}
}
Expand Down
15 changes: 1 addition & 14 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ class LinearScan : public LinearScanInterface

void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);

void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
void buildRefPositionsForNode(GenTree* tree, LsraLocation loc);

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet);
Expand Down Expand Up @@ -1500,23 +1500,10 @@ class LinearScan : public LinearScanInterface

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#if defined(TARGET_AMD64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
return (type == TYP_SIMD32);
}
static const var_types LargeVectorSaveType = TYP_SIMD16;
#elif defined(TARGET_ARM64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
}
static const var_types LargeVectorSaveType = TYP_DOUBLE;
#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#error("Unknown target architecture for FEATURE_SIMD")
#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)

// Set of large vector (TYP_SIMD32 on AVX) variables.
VARSET_TP largeVectorVars;
// Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
Expand Down
13 changes: 6 additions & 7 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
{
LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType))
{
if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
{
Expand Down Expand Up @@ -1424,7 +1424,7 @@ void LinearScan::buildInternalRegisterUses()
void LinearScan::makeUpperVectorInterval(unsigned varIndex)
{
Interval* lclVarInterval = getIntervalForLocalVar(varIndex);
assert(varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
assert(Compiler::varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
Interval* newInt = newInterval(LargeVectorSaveType);
newInt->relatedInterval = lclVarInterval;
newInt->isUpperVector = true;
Expand Down Expand Up @@ -1506,7 +1506,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu
for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
listNode = listNode->Next())
{
if (varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
if (Compiler::varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
{
// In the rare case where such an interval is live across nested calls, we don't need to insert another.
if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave)
Expand Down Expand Up @@ -1637,10 +1637,9 @@ int LinearScan::ComputeAvailableSrcCount(GenTree* node)
//
// Arguments:
// tree - The node for which we are building RefPositions
// block - The BasicBlock in which the node resides
// currentLoc - The LsraLocation of the given node
//
void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc)
void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc)
{
// The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
// GT_CLS_VAR nodes should have been eliminated by rationalizer.
Expand Down Expand Up @@ -2351,7 +2350,7 @@ void LinearScan::buildIntervals()
node->SetRegNum(node->GetRegNum());
#endif

buildRefPositionsForNode(node, block, currentLoc);
buildRefPositionsForNode(node, currentLoc);

#ifdef DEBUG
if (currentLoc > maxNodeLocation)
Expand Down Expand Up @@ -3232,7 +3231,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
def->regOptional = true;
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType))
if (Compiler::varTypeNeedsPartialCalleeSave(varDefInterval->registerType))
{
varDefInterval->isPartiallySpilled = false;
}
Expand Down

0 comments on commit ba57957

Please sign in to comment.