Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 91c7550

Browse files
committed
Adding support for the SSE Reciprocal, ReciprocalSqrt, and Sqrt intrinsics
1 parent 77fa074 commit 91c7550

File tree

5 files changed

+43
-0
lines changed

5 files changed

+43
-0
lines changed

src/jit/emitxarch.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,12 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
196196
case INS_movlpd:
197197
case INS_movlps:
198198
case INS_movss:
199+
case INS_rcpps:
200+
case INS_rcpss:
199201
case INS_roundsd:
200202
case INS_roundss:
203+
case INS_rsqrtps:
204+
case INS_rsqrtss:
201205
case INS_sqrtsd:
202206
case INS_sqrtss:
203207
return IsAVXInstruction(ins);
@@ -5077,6 +5081,11 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
50775081
}
50785082

50795083
#if FEATURE_HW_INTRINSICS
5084+
void emitter::emitIns_SIMD_R_R(instruction ins, regNumber reg, regNumber reg1, var_types simdtype)
5085+
{
5086+
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg1);
5087+
}
5088+
50805089
void emitter::emitIns_SIMD_R_R_A(
50815090
instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype)
50825091
{

src/jit/emitxarch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
441441
void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
442442

443443
#if FEATURE_HW_INTRINSICS
444+
void emitIns_SIMD_R_R(instruction ins, regNumber reg, regNumber reg1, var_types simdtype);
444445
void emitIns_SIMD_R_R_A(instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype);
445446
void emitIns_SIMD_R_R_C(
446447
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype);

src/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,24 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
320320
emit->emitIns_SIMD_R_R_R(INS_orps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
321321
break;
322322

323+
case NI_SSE_Reciprocal:
324+
assert(baseType == TYP_FLOAT);
325+
assert(op2 == nullptr);
326+
emit->emitIns_SIMD_R_R(INS_rcpps, targetReg, op1Reg, TYP_SIMD16);
327+
break;
328+
329+
case NI_SSE_ReciprocalSqrt:
330+
assert(baseType == TYP_FLOAT);
331+
assert(op2 == nullptr);
332+
emit->emitIns_SIMD_R_R(INS_rsqrtps, targetReg, op1Reg, TYP_SIMD16);
333+
break;
334+
335+
case NI_SSE_Sqrt:
336+
assert(baseType == TYP_FLOAT);
337+
assert(op2 == nullptr);
338+
emit->emitIns_SIMD_R_R(INS_sqrtps, targetReg, op1Reg, TYP_SIMD16);
339+
break;
340+
323341
case NI_SSE_Subtract:
324342
assert(baseType == TYP_FLOAT);
325343
op2Reg = op2->gtRegNum;

src/jit/hwintrinsicxarch.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,15 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
485485
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
486486
break;
487487

488+
case NI_SSE_Reciprocal:
489+
case NI_SSE_ReciprocalSqrt:
490+
case NI_SSE_Sqrt:
491+
assert(sig->numArgs == 1);
492+
assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
493+
op1 = impSIMDPopStack(TYP_SIMD16);
494+
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
495+
break;
496+
488497
default:
489498
JITDUMP("Not implemented hardware intrinsic");
490499
break;

src/jit/instrsxarch.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,12 @@ INST3( orps, "orps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56)) /
252252
INST3( orpd, "orpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x56)) // Or packed doubles
253253
INST3( haddpd, "haddpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7C)) // Horizontal add packed doubles
254254

255+
// SSE 2 approx arith
256+
INST3( rcpps, "rcpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x53)) // Reciprocal of packed singles
257+
INST3( rcpss, "rcpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x53)) // Reciprocal of scalar single
258+
INST3( rsqrtps, "rsqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x52)) // Reciprocal Sqrt of packed singles
259+
INST3( rsqrtss, "rsqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x52)) // Reciprocal Sqrt of scalar single
260+
255261
// SSE2 conversions
256262
INST3( cvtpi2ps, "cvtpi2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2A)) // cvt packed DWORDs to singles
257263
INST3( cvtsi2ss, "cvtsi2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2A)) // cvt DWORD to scalar single

0 commit comments

Comments
 (0)