Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 7ba1bf9

Browse files
committed
Adding SSE4.1 intrinsic support for Round, Ceiling, and Floor.
1 parent 2f3fa55 commit 7ba1bf9

File tree

14 files changed

+713
-86
lines changed

14 files changed

+713
-86
lines changed

src/jit/codegen.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ class CodeGen : public CodeGenInterface
6565

6666
// Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
6767
void genSSE2BitwiseOp(GenTreePtr treeNode);
68+
69+
// Generates SSE41 code for the given tree as a round operation
70+
void genSSE41RoundOp(GenTreeOp* treeNode);
6871
#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
6972

7073
void genPrepForCompiler();

src/jit/codegenxarch.cpp

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7331,6 +7331,173 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
73317331
inst_RV_RV(ins, targetReg, operandReg, targetType);
73327332
}
73337333

7334+
//-----------------------------------------------------------------------------------------
7335+
// genSSE41RoundOp - generate SSE41 code for the given tree as a round operation
7336+
//
7337+
// Arguments:
7338+
// treeNode - tree node
7339+
//
7340+
// Return value:
7341+
// None
7342+
//
7343+
// Assumptions:
7344+
// i) SSE4.1 is supported by the underlying hardware
7345+
// ii) treeNode oper is a GT_INTRINSIC
7346+
// iii) treeNode type is a floating point type
7347+
// iv) treeNode is not used from memory
7348+
// v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
7349+
// vi) caller of this routine needs to call genProduceReg()
7350+
void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode)
7351+
{
7352+
// i) SSE4.1 is supported by the underlying hardware
7353+
assert(compiler->compSupports(InstructionSet_SSE41));
7354+
7355+
// ii) treeNode oper is a GT_INTRINSIC
7356+
assert(treeNode->OperGet() == GT_INTRINSIC);
7357+
7358+
GenTree* srcNode = treeNode->gtGetOp1();
7359+
7360+
// iii) treeNode type is floating point type
7361+
assert(varTypeIsFloating(srcNode));
7362+
assert(srcNode->TypeGet() == treeNode->TypeGet());
7363+
7364+
// iv) treeNode is not used from memory
7365+
assert(!treeNode->isUsedFromMemory());
7366+
7367+
genConsumeOperands(treeNode);
7368+
7369+
instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_roundss : INS_roundsd;
7370+
emitAttr size = emitTypeSize(treeNode);
7371+
7372+
regNumber dstReg = treeNode->gtRegNum;
7373+
7374+
unsigned ival = 0;
7375+
7376+
// v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
7377+
switch (treeNode->gtIntrinsic.gtIntrinsicId)
7378+
{
7379+
case CORINFO_INTRINSIC_Round:
7380+
ival = 4;
7381+
break;
7382+
7383+
case CORINFO_INTRINSIC_Ceiling:
7384+
ival = 10;
7385+
break;
7386+
7387+
case CORINFO_INTRINSIC_Floor:
7388+
ival = 9;
7389+
break;
7390+
7391+
default:
7392+
ins = INS_invalid;
7393+
assert(!"genSSE41RoundOp: unsupported intrinsic");
7394+
unreached();
7395+
}
7396+
7397+
if (srcNode->isContained() || srcNode->isUsedFromSpillTemp())
7398+
{
7399+
emitter* emit = getEmitter();
7400+
7401+
TempDsc* tmpDsc = nullptr;
7402+
unsigned varNum = BAD_VAR_NUM;
7403+
unsigned offset = (unsigned)-1;
7404+
7405+
if (srcNode->isUsedFromSpillTemp())
7406+
{
7407+
assert(srcNode->IsRegOptional());
7408+
7409+
tmpDsc = getSpillTempDsc(srcNode);
7410+
varNum = tmpDsc->tdTempNum();
7411+
offset = 0;
7412+
7413+
compiler->tmpRlsTemp(tmpDsc);
7414+
}
7415+
else if (srcNode->isIndir())
7416+
{
7417+
GenTreeIndir* memIndir = srcNode->AsIndir();
7418+
GenTree* memBase = memIndir->gtOp1;
7419+
7420+
switch (memBase->OperGet())
7421+
{
7422+
case GT_LCL_VAR_ADDR:
7423+
{
7424+
varNum = memBase->AsLclVarCommon()->GetLclNum();
7425+
offset = 0;
7426+
7427+
// Ensure that all the GenTreeIndir values are set to their defaults.
7428+
assert(memBase->gtRegNum == REG_NA);
7429+
assert(!memIndir->HasIndex());
7430+
assert(memIndir->Scale() == 1);
7431+
assert(memIndir->Offset() == 0);
7432+
7433+
break;
7434+
}
7435+
7436+
case GT_CLS_VAR_ADDR:
7437+
{
7438+
emit->emitIns_R_C_I(ins, size, dstReg, memBase->gtClsVar.gtClsVarHnd, 0, ival);
7439+
return;
7440+
}
7441+
7442+
default:
7443+
{
7444+
emit->emitIns_R_A_I(ins, size, dstReg, memIndir, ival);
7445+
return;
7446+
}
7447+
}
7448+
}
7449+
else
7450+
{
7451+
switch (srcNode->OperGet())
7452+
{
7453+
case GT_CNS_DBL:
7454+
{
7455+
GenTreeDblCon* dblConst = srcNode->AsDblCon();
7456+
CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst->gtDconVal, emitTypeSize(dblConst));
7457+
7458+
emit->emitIns_R_C_I(ins, size, dstReg, hnd, 0, ival);
7459+
return;
7460+
}
7461+
7462+
case GT_LCL_FLD:
7463+
{
7464+
GenTreeLclFld* lclField = srcNode->AsLclFld();
7465+
7466+
varNum = lclField->GetLclNum();
7467+
offset = lclField->gtLclFld.gtLclOffs;
7468+
break;
7469+
}
7470+
7471+
case GT_LCL_VAR:
7472+
{
7473+
assert(srcNode->IsRegOptional() ||
7474+
!compiler->lvaTable[srcNode->gtLclVar.gtLclNum].lvIsRegCandidate());
7475+
7476+
varNum = srcNode->AsLclVar()->GetLclNum();
7477+
offset = 0;
7478+
break;
7479+
}
7480+
7481+
default:
7482+
unreached();
7483+
break;
7484+
}
7485+
}
7486+
7487+
// Ensure we got a good varNum and offset.
7488+
// We also need to check for `tmpDsc != nullptr` since spill temp numbers
7489+
// are negative and start with -1, which also happens to be BAD_VAR_NUM.
7490+
assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
7491+
assert(offset != (unsigned)-1);
7492+
7493+
emit->emitIns_R_S_I(ins, size, dstReg, varNum, offset, ival);
7494+
}
7495+
else
7496+
{
7497+
inst_RV_RV_IV(ins, size, dstReg, srcNode->gtRegNum, ival);
7498+
}
7499+
}
7500+
73347501
//---------------------------------------------------------------------
73357502
// genIntrinsic - generate code for a given intrinsic
73367503
//
@@ -7361,6 +7528,12 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
73617528
genSSE2BitwiseOp(treeNode);
73627529
break;
73637530

7531+
case CORINFO_INTRINSIC_Round:
7532+
case CORINFO_INTRINSIC_Ceiling:
7533+
case CORINFO_INTRINSIC_Floor:
7534+
genSSE41RoundOp(treeNode->AsOp());
7535+
break;
7536+
73647537
default:
73657538
assert(!"genIntrinsic: Unsupported intrinsic");
73667539
unreached();

src/jit/compiler.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3222,10 +3222,10 @@ class Compiler
32223222
unsigned* typeSize,
32233223
bool forReturn);
32243224

3225-
static bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId);
3226-
static bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId);
3227-
static bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId);
3228-
static bool IsMathIntrinsic(GenTreePtr tree);
3225+
bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId);
3226+
bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId);
3227+
bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId);
3228+
bool IsMathIntrinsic(GenTreePtr tree);
32293229

32303230
private:
32313231
//----------------- Importing the method ----------------------------------

src/jit/emitfmtsxarch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ IF_DEF(MRD_OFF, IS_GM_RD, DSP) // offset mem
122122
IF_DEF(RRD_MRD, IS_GM_RD|IS_R1_RD, DSP) // read reg , read [mem]
123123
IF_DEF(RWR_MRD, IS_GM_RD|IS_R1_WR, DSP) // write reg , read [mem]
124124
IF_DEF(RRW_MRD, IS_GM_RD|IS_R1_RW, DSP) // r/w reg , read [mem]
125+
IF_DEF(RRW_MRD_CNS, IS_GM_RD|IS_R1_RW, DSP_CNS) // r/w reg , read [mem], const
125126

126127
IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP) // write reg , read reg2 , read [mem]
127128
IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR, DSP) // write reg , offset mem
@@ -147,6 +148,7 @@ IF_DEF(SRW, IS_SF_RW, NONE) // r/w [stk]
147148
IF_DEF(RRD_SRD, IS_SF_RD|IS_R1_RD, NONE) // read reg , read [stk]
148149
IF_DEF(RWR_SRD, IS_SF_RD|IS_R1_WR, NONE) // write reg , read [stk]
149150
IF_DEF(RRW_SRD, IS_SF_RD|IS_R1_RW, NONE) // r/w reg , read [stk]
151+
IF_DEF(RRW_SRD_CNS, IS_SF_RD|IS_R1_RW, CNS ) // r/w reg , read [stk], const
150152

151153
IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE) // write reg , read reg2, read [stk]
152154

@@ -172,6 +174,7 @@ IF_DEF(ARW, IS_AM_RW, AMD ) // r/w [adr]
172174
IF_DEF(RRD_ARD, IS_AM_RD|IS_R1_RD, AMD ) // read reg , read [adr]
173175
IF_DEF(RWR_ARD, IS_AM_RD|IS_R1_WR, AMD ) // write reg , read [adr]
174176
IF_DEF(RRW_ARD, IS_AM_RD|IS_R1_RW, AMD ) // r/w reg , read [adr]
177+
IF_DEF(RRW_ARD_CNS, IS_AM_RD|IS_R1_RW, AMD_CNS) // r/w reg , read [adr], const
175178

176179
IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD ) // write reg , read reg2, read [adr]
177180

0 commit comments

Comments
 (0)