Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable CSE for floating-point constants #44419

Merged
merged 2 commits into from
Feb 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6539,6 +6539,7 @@ class Compiler
ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar
// assignment
unsigned csdIndex; // 1..optCSECandidateCount
bool csdIsSharedConst; // true if this CSE is a shared const
bool csdLiveAcrossCall;

unsigned short csdDefCount; // definition count
Expand Down Expand Up @@ -6631,9 +6632,17 @@ class Compiler
return ((key & TARGET_SIGN_BIT) != 0);
}

static size_t Decode_Shared_Const_CSE_Value(size_t key)
// returns the encoded key
static size_t Encode_Shared_Const_CSE_Value(size_t key)
{
return (key & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
return TARGET_SIGN_BIT | (key >> CSE_CONST_SHARED_LOW_BITS);
}

// returns the orginal key
static size_t Decode_Shared_Const_CSE_Value(size_t enckey)
{
assert(Is_Shared_Const_CSE(enckey));
return (enckey & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
}

#endif // FEATURE_ANYCSE
Expand Down
37 changes: 34 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3316,7 +3316,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costSz = 2;
costEx = 1;
}
else if (codeGen->validImmForInstr(INS_mov, conVal) && codeGen->validImmForInstr(INS_mvn, conVal))
else if (codeGen->validImmForInstr(INS_mov, conVal) || codeGen->validImmForInstr(INS_mvn, conVal))
{
// Uses mov or mvn
costSz = 4;
Expand Down Expand Up @@ -3483,7 +3483,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
break;

case GT_CNS_DBL:
level = 0;
{
var_types targetType = tree->TypeGet();
level = 0;
#if defined(TARGET_XARCH)
/* We use fldz and fld1 to load 0.0 and 1.0, but all other */
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
/* floating point constants are loaded using an indirection */
if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
Expand All @@ -3497,7 +3500,35 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costEx = IND_COST_EX;
costSz = 4;
}
break;
#elif defined(TARGET_ARM)
if (targetType == TYP_FLOAT)
{
costEx = 1 + 2;
costSz = 2 + 4;
}
else
{
assert(targetType == TYP_DOUBLE);
costEx = 1 + 4;
costSz = 2 + 8;
}
#elif defined(TARGET_ARM64)
if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal))
{
costEx = 1;
costSz = 1;
}
else
{
costEx = IND_COST_EX;
costSz = 4;
}
#else
#error "Unknown TARGET"
#endif
}
break;

case GT_LCL_VAR:
level = 1;
Expand Down
24 changes: 23 additions & 1 deletion src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,29 @@ GenTree* Compiler::fgMorphCast(GenTree* tree)
}
else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
{
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this part of this change or some other fix?

Copy link
Contributor Author

@briansull briansull Feb 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is required. CSE will assert because without this we can make a CSE with mismatched types (TYP_FLOAT and TYP_DOUBLE)


// Since we don't have a Jit Helper that converts to a TYP_FLOAT
// we just use the one that converts to a TYP_DOUBLE
// and then add a cast to TYP_FLOAT
//
if ((dstType == TYP_FLOAT) && (oper->OperGet() == GT_CALL))
{
// Fix the return type to be TYP_DOUBLE
//
oper->gtType = TYP_DOUBLE;

// Add a Cast to TYP_FLOAT
//
tree = gtNewCastNode(TYP_FLOAT, oper, false, TYP_FLOAT);
INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);

return tree;
}
else
{
return oper;
}
}
#endif // TARGET_X86
else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
Expand Down
48 changes: 21 additions & 27 deletions src/coreclr/jit/optcse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
CSEdsc* hashDsc;
bool isIntConstHash = false;
bool enableSharedConstCSE = false;
bool isSharedConst = false;
int configValue = JitConfig.JitConstCSE();

#if defined(TARGET_ARM64)
Expand Down Expand Up @@ -485,19 +486,21 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
assert(vnStore->IsVNConstant(vnLibNorm));
key = vnStore->CoercedConstantValue<size_t>(vnLibNorm);

// We don't shared small offset constants when we require a reloc
// We don't share small offset constants when we require a reloc
//
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe explain why not? Is it a legality thing or a profitability thing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be incorrect to CSE,,
You can't eliminate a reloc using a CSE and adding an offset to a different constant.

if (!tree->AsIntConCommon()->ImmedValNeedsReloc(this))
{
// Make constants that have the same upper bits use the same key

// Shift the key right by CSE_CONST_SHARED_LOW_BITS bits, this sets the upper bits to zero
key >>= CSE_CONST_SHARED_LOW_BITS;
key = Encode_Shared_Const_CSE_Value(key);
isSharedConst = true;
}
else
{
// Since we are using the sign bit as a discriminator
// we don't allow/expect it to be set when we need a reloc
//
assert((key & TARGET_SIGN_BIT) == 0);
}
assert((key & TARGET_SIGN_BIT) == 0);

// We use the sign bit of 'key' as the flag
// that we are hashing constants (with a shared offset)
key |= TARGET_SIGN_BIT;
}
else // Not a GT_COMMA or a GT_CNS_INT
{
Expand Down Expand Up @@ -543,6 +546,7 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
hashDsc->csdTreeLast = newElem;
hashDsc->csdStructHnd = NO_CLASS_HANDLE;

hashDsc->csdIsSharedConst = isSharedConst;
hashDsc->csdStructHndMismatch = false;

if (varTypeIsStruct(tree->gtType))
Expand Down Expand Up @@ -661,6 +665,7 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
hashDsc->csdConstDefValue = 0;
hashDsc->csdConstDefVN = vnStore->VNForNull(); // uninit value
hashDsc->csdIndex = 0;
hashDsc->csdIsSharedConst = false;
hashDsc->csdLiveAcrossCall = false;
hashDsc->csdDefCount = 0;
hashDsc->csdUseCount = 0;
Expand Down Expand Up @@ -2127,6 +2132,11 @@ class CSE_Heuristic
return m_Size;
}

bool IsSharedConst()
{
return m_CseDsc->csdIsSharedConst;
}

bool LiveAcrossCall()
{
return m_CseDsc->csdLiveAcrossCall;
Expand Down Expand Up @@ -2819,7 +2829,7 @@ class CSE_Heuristic
//
bool setRefCnt = true;
bool allSame = true;
bool isSharedConst = Compiler::Is_Shared_Const_CSE(dsc->csdHashKey);
bool isSharedConst = successfulCandidate->IsSharedConst();
ValueNum bestVN = ValueNumStore::NoVN;
bool bestIsDef = false;
ssize_t bestConstValue = 0;
Expand Down Expand Up @@ -2861,7 +2871,7 @@ class CSE_Heuristic

ssize_t diff = curConstValue - bestConstValue;

// The ARM64 ldr addressing modes allow for a subtraction of up to 255
// The ARM addressing modes allow for a subtraction of up to 255
// so we will allow the diff to be up to -255 before replacing a CSE def
// This will minimize the number of extra subtract instructions.
//
Expand Down Expand Up @@ -3485,22 +3495,6 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
return false;
}

#ifdef TARGET_X86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need diffs for x86/x64 as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

X64 Asm Diffs:

Top file regressions (bytes):
         239 : Microsoft.Diagnostics.Tracing.TraceEvent.dasm (0.01% of base)
         123 : Microsoft.VisualBasic.Core.dasm (0.03% of base)
          99 : System.Speech.dasm (0.03% of base)
          75 : System.Runtime.Numerics.dasm (0.10% of base)
          46 : System.Drawing.Common.dasm (0.01% of base)
           7 : System.Net.Http.WinHttpHandler.dasm (0.01% of base)
           4 : System.Private.Xml.dasm (0.00% of base)

Top file improvements (bytes):
         -61 : System.Private.CoreLib.dasm (-0.00% of base)
          -4 : System.Data.Common.dasm (-0.00% of base)
          -4 : Microsoft.CSharp.dasm (-0.00% of base)
          -1 : System.Net.Http.dasm (-0.00% of base)

X86 Asm Diffs:

Top file regressions (bytes):
          31 : System.Drawing.Primitives.dasm (0.10% of base)
          24 : Microsoft.VisualBasic.Core.dasm (0.01% of base)
          18 : System.ComponentModel.TypeConverter.dasm (0.01% of base)
          11 : System.Drawing.Common.dasm (0.00% of base)
          11 : System.Linq.Expressions.dasm (0.00% of base)
          10 : Microsoft.CodeAnalysis.CSharp.dasm (0.00% of base)
           4 : System.Private.DataContractSerialization.dasm (0.00% of base)

Top file improvements (bytes):
         -41 : System.Speech.dasm (-0.01% of base)
         -27 : System.Private.CoreLib.dasm (-0.00% of base)
         -13 : System.Data.Common.dasm (-0.00% of base)
          -9 : FSharp.Core.dasm (-0.00% of base)
          -3 : Microsoft.Diagnostics.Tracing.TraceEvent.dasm (-0.00% of base)
          -2 : System.Linq.Parallel.dasm (-0.00% of base)

if (type == TYP_FLOAT)
{
// TODO-X86-CQ: Revisit this
// Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
return false;
}
#else
if (oper == GT_CNS_DBL)
{
// TODO-CQ: Revisit this
// Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
return false;
}
#endif

unsigned cost;
if (compCodeOpt() == SMALL_CODE)
{
Expand Down