@@ -7331,6 +7331,173 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
73317331 inst_RV_RV (ins, targetReg, operandReg, targetType);
73327332}
73337333
7334+ // -----------------------------------------------------------------------------------------
7335+ // genSSE41RoundOp - generate SSE41 code for the given tree as a round operation
7336+ //
7337+ // Arguments:
7338+ // treeNode - tree node
7339+ //
7340+ // Return value:
7341+ // None
7342+ //
7343+ // Assumptions:
7344+ // i) SSE4.1 is supported by the underlying hardware
7345+ // ii) treeNode oper is a GT_INTRINSIC
7346+ // iii) treeNode type is a floating point type
7347+ // iv) treeNode is not used from memory
7348+ // v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
7349+ // vi) caller of this routine needs to call genProduceReg()
7350+ void CodeGen::genSSE41RoundOp (GenTreeOp* treeNode)
7351+ {
7352+ // i) SSE4.1 is supported by the underlying hardware
7353+ assert (compiler->compSupports (InstructionSet_SSE41));
7354+
7355+ // ii) treeNode oper is a GT_INTRINSIC
7356+ assert (treeNode->OperGet () == GT_INTRINSIC);
7357+
7358+ GenTree* srcNode = treeNode->gtGetOp1 ();
7359+
7360+ // iii) treeNode type is floating point type
7361+ assert (varTypeIsFloating (srcNode));
7362+ assert (srcNode->TypeGet () == treeNode->TypeGet ());
7363+
7364+ // iv) treeNode is not used from memory
7365+ assert (!treeNode->isUsedFromMemory ());
7366+
7367+ genConsumeOperands (treeNode);
7368+
7369+ instruction ins = (treeNode->TypeGet () == TYP_FLOAT) ? INS_roundss : INS_roundsd;
7370+ emitAttr size = emitTypeSize (treeNode);
7371+
7372+ regNumber dstReg = treeNode->gtRegNum ;
7373+
7374+ unsigned ival = 0 ;
7375+
7376+ // v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
7377+ switch (treeNode->gtIntrinsic .gtIntrinsicId )
7378+ {
7379+ case CORINFO_INTRINSIC_Round:
7380+ ival = 4 ;
7381+ break ;
7382+
7383+ case CORINFO_INTRINSIC_Ceiling:
7384+ ival = 10 ;
7385+ break ;
7386+
7387+ case CORINFO_INTRINSIC_Floor:
7388+ ival = 9 ;
7389+ break ;
7390+
7391+ default :
7392+ ins = INS_invalid;
7393+ assert (!" genSSE41RoundOp: unsupported intrinsic" );
7394+ unreached ();
7395+ }
7396+
7397+ if (srcNode->isContained () || srcNode->isUsedFromSpillTemp ())
7398+ {
7399+ emitter* emit = getEmitter ();
7400+
7401+ TempDsc* tmpDsc = nullptr ;
7402+ unsigned varNum = BAD_VAR_NUM;
7403+ unsigned offset = (unsigned )-1 ;
7404+
7405+ if (srcNode->isUsedFromSpillTemp ())
7406+ {
7407+ assert (srcNode->IsRegOptional ());
7408+
7409+ tmpDsc = getSpillTempDsc (srcNode);
7410+ varNum = tmpDsc->tdTempNum ();
7411+ offset = 0 ;
7412+
7413+ compiler->tmpRlsTemp (tmpDsc);
7414+ }
7415+ else if (srcNode->isIndir ())
7416+ {
7417+ GenTreeIndir* memIndir = srcNode->AsIndir ();
7418+ GenTree* memBase = memIndir->gtOp1 ;
7419+
7420+ switch (memBase->OperGet ())
7421+ {
7422+ case GT_LCL_VAR_ADDR:
7423+ {
7424+ varNum = memBase->AsLclVarCommon ()->GetLclNum ();
7425+ offset = 0 ;
7426+
7427+ // Ensure that all the GenTreeIndir values are set to their defaults.
7428+ assert (memBase->gtRegNum == REG_NA);
7429+ assert (!memIndir->HasIndex ());
7430+ assert (memIndir->Scale () == 1 );
7431+ assert (memIndir->Offset () == 0 );
7432+
7433+ break ;
7434+ }
7435+
7436+ case GT_CLS_VAR_ADDR:
7437+ {
7438+ emit->emitIns_R_C_I (ins, size, dstReg, memBase->gtClsVar .gtClsVarHnd , 0 , ival);
7439+ return ;
7440+ }
7441+
7442+ default :
7443+ {
7444+ emit->emitIns_R_A_I (ins, size, dstReg, memIndir, ival);
7445+ return ;
7446+ }
7447+ }
7448+ }
7449+ else
7450+ {
7451+ switch (srcNode->OperGet ())
7452+ {
7453+ case GT_CNS_DBL:
7454+ {
7455+ GenTreeDblCon* dblConst = srcNode->AsDblCon ();
7456+ CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst (dblConst->gtDconVal , emitTypeSize (dblConst));
7457+
7458+ emit->emitIns_R_C_I (ins, size, dstReg, hnd, 0 , ival);
7459+ return ;
7460+ }
7461+
7462+ case GT_LCL_FLD:
7463+ {
7464+ GenTreeLclFld* lclField = srcNode->AsLclFld ();
7465+
7466+ varNum = lclField->GetLclNum ();
7467+ offset = lclField->gtLclFld .gtLclOffs ;
7468+ break ;
7469+ }
7470+
7471+ case GT_LCL_VAR:
7472+ {
7473+ assert (srcNode->IsRegOptional () ||
7474+ !compiler->lvaTable [srcNode->gtLclVar .gtLclNum ].lvIsRegCandidate ());
7475+
7476+ varNum = srcNode->AsLclVar ()->GetLclNum ();
7477+ offset = 0 ;
7478+ break ;
7479+ }
7480+
7481+ default :
7482+ unreached ();
7483+ break ;
7484+ }
7485+ }
7486+
7487+ // Ensure we got a good varNum and offset.
7488+ // We also need to check for `tmpDsc != nullptr` since spill temp numbers
7489+ // are negative and start with -1, which also happens to be BAD_VAR_NUM.
7490+ assert ((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr ));
7491+ assert (offset != (unsigned )-1 );
7492+
7493+ emit->emitIns_R_S_I (ins, size, dstReg, varNum, offset, ival);
7494+ }
7495+ else
7496+ {
7497+ inst_RV_RV_IV (ins, size, dstReg, srcNode->gtRegNum , ival);
7498+ }
7499+ }
7500+
73347501// ---------------------------------------------------------------------
73357502// genIntrinsic - generate code for a given intrinsic
73367503//
@@ -7361,6 +7528,12 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
73617528 genSSE2BitwiseOp (treeNode);
73627529 break ;
73637530
7531+ case CORINFO_INTRINSIC_Round:
7532+ case CORINFO_INTRINSIC_Ceiling:
7533+ case CORINFO_INTRINSIC_Floor:
7534+ genSSE41RoundOp (treeNode->AsOp ());
7535+ break ;
7536+
73647537 default :
73657538 assert (!" genIntrinsic: Unsupported intrinsic" );
73667539 unreached ();
0 commit comments