Skip to content

Commit

Permalink
Enable CSE for floating-point constants (#44419)
Browse files Browse the repository at this point in the history
* Implement CSE for constants of type float and double
Update the gtCostEx and gtCostSz values of  float and double constants for Arm and Arm64
Fix the gtCosts for 16-bit ARM32 immediate values

* Code Review feedback
  • Loading branch information
briansull committed Feb 27, 2021
1 parent 535aaf8 commit 70676fd
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 33 deletions.
13 changes: 11 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6539,6 +6539,7 @@ class Compiler
ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar
// assignment
unsigned csdIndex; // 1..optCSECandidateCount
bool csdIsSharedConst; // true if this CSE is a shared const
bool csdLiveAcrossCall;

unsigned short csdDefCount; // definition count
Expand Down Expand Up @@ -6631,9 +6632,17 @@ class Compiler
return ((key & TARGET_SIGN_BIT) != 0);
}

static size_t Decode_Shared_Const_CSE_Value(size_t key)
// returns the encoded key
static size_t Encode_Shared_Const_CSE_Value(size_t key)
{
return (key & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
return TARGET_SIGN_BIT | (key >> CSE_CONST_SHARED_LOW_BITS);
}

// returns the orginal key
static size_t Decode_Shared_Const_CSE_Value(size_t enckey)
{
assert(Is_Shared_Const_CSE(enckey));
return (enckey & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
}

#endif // FEATURE_ANYCSE
Expand Down
37 changes: 34 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3316,7 +3316,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costSz = 2;
costEx = 1;
}
else if (codeGen->validImmForInstr(INS_mov, conVal) && codeGen->validImmForInstr(INS_mvn, conVal))
else if (codeGen->validImmForInstr(INS_mov, conVal) || codeGen->validImmForInstr(INS_mvn, conVal))
{
// Uses mov or mvn
costSz = 4;
Expand Down Expand Up @@ -3483,7 +3483,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
break;

case GT_CNS_DBL:
level = 0;
{
var_types targetType = tree->TypeGet();
level = 0;
#if defined(TARGET_XARCH)
/* We use fldz and fld1 to load 0.0 and 1.0, but all other */
/* floating point constants are loaded using an indirection */
if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
Expand All @@ -3497,7 +3500,35 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costEx = IND_COST_EX;
costSz = 4;
}
break;
#elif defined(TARGET_ARM)
if (targetType == TYP_FLOAT)
{
costEx = 1 + 2;
costSz = 2 + 4;
}
else
{
assert(targetType == TYP_DOUBLE);
costEx = 1 + 4;
costSz = 2 + 8;
}
#elif defined(TARGET_ARM64)
if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal))
{
costEx = 1;
costSz = 1;
}
else
{
costEx = IND_COST_EX;
costSz = 4;
}
#else
#error "Unknown TARGET"
#endif
}
break;

case GT_LCL_VAR:
level = 1;
Expand Down
24 changes: 23 additions & 1 deletion src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,29 @@ GenTree* Compiler::fgMorphCast(GenTree* tree)
}
else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
{
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);

// Since we don't have a Jit Helper that converts to a TYP_FLOAT
// we just use the one that converts to a TYP_DOUBLE
// and then add a cast to TYP_FLOAT
//
if ((dstType == TYP_FLOAT) && (oper->OperGet() == GT_CALL))
{
// Fix the return type to be TYP_DOUBLE
//
oper->gtType = TYP_DOUBLE;

// Add a Cast to TYP_FLOAT
//
tree = gtNewCastNode(TYP_FLOAT, oper, false, TYP_FLOAT);
INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);

return tree;
}
else
{
return oper;
}
}
#endif // TARGET_X86
else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
Expand Down
48 changes: 21 additions & 27 deletions src/coreclr/jit/optcse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
CSEdsc* hashDsc;
bool isIntConstHash = false;
bool enableSharedConstCSE = false;
bool isSharedConst = false;
int configValue = JitConfig.JitConstCSE();

#if defined(TARGET_ARM64)
Expand Down Expand Up @@ -485,19 +486,21 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
assert(vnStore->IsVNConstant(vnLibNorm));
key = vnStore->CoercedConstantValue<size_t>(vnLibNorm);

// We don't shared small offset constants when we require a reloc
// We don't share small offset constants when we require a reloc
//
if (!tree->AsIntConCommon()->ImmedValNeedsReloc(this))
{
// Make constants that have the same upper bits use the same key

// Shift the key right by CSE_CONST_SHARED_LOW_BITS bits, this sets the upper bits to zero
key >>= CSE_CONST_SHARED_LOW_BITS;
key = Encode_Shared_Const_CSE_Value(key);
isSharedConst = true;
}
else
{
// Since we are using the sign bit as a discriminator
// we don't allow/expect it to be set when we need a reloc
//
assert((key & TARGET_SIGN_BIT) == 0);
}
assert((key & TARGET_SIGN_BIT) == 0);

// We use the sign bit of 'key' as the flag
// that we are hashing constants (with a shared offset)
key |= TARGET_SIGN_BIT;
}
else // Not a GT_COMMA or a GT_CNS_INT
{
Expand Down Expand Up @@ -543,6 +546,7 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
hashDsc->csdTreeLast = newElem;
hashDsc->csdStructHnd = NO_CLASS_HANDLE;

hashDsc->csdIsSharedConst = isSharedConst;
hashDsc->csdStructHndMismatch = false;

if (varTypeIsStruct(tree->gtType))
Expand Down Expand Up @@ -661,6 +665,7 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
hashDsc->csdConstDefValue = 0;
hashDsc->csdConstDefVN = vnStore->VNForNull(); // uninit value
hashDsc->csdIndex = 0;
hashDsc->csdIsSharedConst = false;
hashDsc->csdLiveAcrossCall = false;
hashDsc->csdDefCount = 0;
hashDsc->csdUseCount = 0;
Expand Down Expand Up @@ -2127,6 +2132,11 @@ class CSE_Heuristic
return m_Size;
}

bool IsSharedConst()
{
return m_CseDsc->csdIsSharedConst;
}

bool LiveAcrossCall()
{
return m_CseDsc->csdLiveAcrossCall;
Expand Down Expand Up @@ -2819,7 +2829,7 @@ class CSE_Heuristic
//
bool setRefCnt = true;
bool allSame = true;
bool isSharedConst = Compiler::Is_Shared_Const_CSE(dsc->csdHashKey);
bool isSharedConst = successfulCandidate->IsSharedConst();
ValueNum bestVN = ValueNumStore::NoVN;
bool bestIsDef = false;
ssize_t bestConstValue = 0;
Expand Down Expand Up @@ -2861,7 +2871,7 @@ class CSE_Heuristic

ssize_t diff = curConstValue - bestConstValue;

// The ARM64 ldr addressing modes allow for a subtraction of up to 255
// The ARM addressing modes allow for a subtraction of up to 255
// so we will allow the diff to be up to -255 before replacing a CSE def
// This will minimize the number of extra subtract instructions.
//
Expand Down Expand Up @@ -3485,22 +3495,6 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
return false;
}

#ifdef TARGET_X86
if (type == TYP_FLOAT)
{
// TODO-X86-CQ: Revisit this
// Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
return false;
}
#else
if (oper == GT_CNS_DBL)
{
// TODO-CQ: Revisit this
// Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
return false;
}
#endif

unsigned cost;
if (compCodeOpt() == SMALL_CODE)
{
Expand Down

0 comments on commit 70676fd

Please sign in to comment.