Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit adadee1

Browse files
committed
Adding support for the SSE And, AndNot, Divide, Max, Min, MoveHighToLow, MoveLowToHigh, Multiply, Or, Subtract, UnpackHigh, UnpackLow, and Xor instructions.
1 parent bd56bb3 commit adadee1

File tree

4 files changed

+124
-5
lines changed

4 files changed

+124
-5
lines changed

src/jit/emitxarch.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
105105
case INS_minps:
106106
case INS_minsd:
107107
case INS_minss:
108+
case INS_movhlps:
108109
case INS_movlhps:
109110
case INS_mulpd:
110111
case INS_mulps:
@@ -166,6 +167,8 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
166167
case INS_subps:
167168
case INS_subsd:
168169
case INS_subss:
170+
case INS_unpckhps:
171+
case INS_unpcklps:
169172
case INS_vinsertf128:
170173
case INS_vinserti128:
171174
case INS_vperm2i128:

src/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 103 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,6 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
9797
assert(targetReg != REG_NA);
9898
assert(op1Reg != REG_NA);
9999

100-
genConsumeOperands(node);
101-
102100
if (op2->isContained() || op2->isUsedFromSpillTemp())
103101
{
104102
TempDsc* tmpDsc = nullptr;
@@ -188,15 +186,24 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
188186
{
189187
emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op2->gtRegNum, targetType);
190188
}
191-
192-
genProduceReg(node);
193189
}
194190

195191
void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
196192
{
197193
NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
194+
GenTree* op1 = node->gtGetOp1();
195+
GenTree* op2 = node->gtGetOp2();
196+
regNumber targetReg = node->gtRegNum;
197+
var_types targetType = node->TypeGet();
198+
var_types baseType = node->gtSIMDBaseType;
198199
instruction ins = INS_invalid;
199200

201+
regNumber op1Reg = op1->gtRegNum;
202+
regNumber op2Reg = REG_NA;
203+
emitter* emit = getEmitter();
204+
205+
genConsumeOperands(node);
206+
200207
switch (intrinsicID)
201208
{
202209
case NI_SSE_Add:
@@ -207,10 +214,90 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
207214
break;
208215
}
209216

217+
case NI_SSE_And:
218+
assert(baseType == TYP_FLOAT);
219+
op2Reg = op2->gtRegNum;
220+
emit->emitIns_SIMD_R_R_R(INS_andps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
221+
break;
222+
223+
case NI_SSE_AndNot:
224+
assert(baseType == TYP_FLOAT);
225+
op2Reg = op2->gtRegNum;
226+
emit->emitIns_SIMD_R_R_R(INS_andnps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
227+
break;
228+
229+
case NI_SSE_Divide:
230+
assert(baseType == TYP_FLOAT);
231+
op2Reg = op2->gtRegNum;
232+
emit->emitIns_SIMD_R_R_R(INS_divps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
233+
break;
234+
235+
case NI_SSE_Max:
236+
assert(baseType == TYP_FLOAT);
237+
op2Reg = op2->gtRegNum;
238+
emit->emitIns_SIMD_R_R_R(INS_maxps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
239+
break;
240+
241+
case NI_SSE_Min:
242+
assert(baseType == TYP_FLOAT);
243+
op2Reg = op2->gtRegNum;
244+
emit->emitIns_SIMD_R_R_R(INS_minps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
245+
break;
246+
247+
case NI_SSE_MoveHighToLow:
248+
assert(baseType == TYP_FLOAT);
249+
op2Reg = op2->gtRegNum;
250+
emit->emitIns_SIMD_R_R_R(INS_movhlps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
251+
break;
252+
253+
case NI_SSE_MoveLowToHigh:
254+
assert(baseType == TYP_FLOAT);
255+
op2Reg = op2->gtRegNum;
256+
emit->emitIns_SIMD_R_R_R(INS_movlhps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
257+
break;
258+
259+
case NI_SSE_Multiply:
260+
assert(baseType == TYP_FLOAT);
261+
op2Reg = op2->gtRegNum;
262+
emit->emitIns_SIMD_R_R_R(INS_mulps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
263+
break;
264+
265+
case NI_SSE_Or:
266+
assert(baseType == TYP_FLOAT);
267+
op2Reg = op2->gtRegNum;
268+
emit->emitIns_SIMD_R_R_R(INS_orps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
269+
break;
270+
271+
case NI_SSE_Subtract:
272+
assert(baseType == TYP_FLOAT);
273+
op2Reg = op2->gtRegNum;
274+
emit->emitIns_SIMD_R_R_R(INS_subps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
275+
break;
276+
277+
case NI_SSE_UnpackHigh:
278+
assert(baseType == TYP_FLOAT);
279+
op2Reg = op2->gtRegNum;
280+
emit->emitIns_SIMD_R_R_R(INS_unpckhps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
281+
break;
282+
283+
case NI_SSE_UnpackLow:
284+
assert(baseType == TYP_FLOAT);
285+
op2Reg = op2->gtRegNum;
286+
emit->emitIns_SIMD_R_R_R(INS_unpcklps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
287+
break;
288+
289+
case NI_SSE_Xor:
290+
assert(baseType == TYP_FLOAT);
291+
op2Reg = op2->gtRegNum;
292+
emit->emitIns_SIMD_R_R_R(INS_xorps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
293+
break;
294+
210295
default:
211296
unreached();
212297
break;
213298
}
299+
300+
genProduceReg(node);
214301
}
215302

216303
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
@@ -219,6 +306,8 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
219306
var_types baseType = node->gtSIMDBaseType;
220307
instruction ins = INS_invalid;
221308

309+
genConsumeOperands(node);
310+
222311
switch (intrinsicID)
223312
{
224313
case NI_SSE2_Add:
@@ -259,6 +348,8 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
259348
unreached();
260349
break;
261350
}
351+
352+
genProduceReg(node);
262353
}
263354

264355
void CodeGen::genSSE3Intrinsic(GenTreeHWIntrinsic* node)
@@ -324,6 +415,8 @@ void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
324415
var_types baseType = node->gtSIMDBaseType;
325416
instruction ins = INS_invalid;
326417

418+
genConsumeOperands(node);
419+
327420
switch (intrinsicID)
328421
{
329422
case NI_AVX_Add:
@@ -351,6 +444,8 @@ void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
351444
unreached();
352445
break;
353446
}
447+
448+
genProduceReg(node);
354449
}
355450

356451
void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
@@ -359,6 +454,8 @@ void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
359454
var_types baseType = node->gtSIMDBaseType;
360455
instruction ins = INS_invalid;
361456

457+
genConsumeOperands(node);
458+
362459
switch (intrinsicID)
363460
{
364461
case NI_AVX2_Add:
@@ -396,6 +493,8 @@ void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
396493
unreached();
397494
break;
398495
}
496+
497+
genProduceReg(node);
399498
}
400499

401500
void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)

src/jit/hwintrinsicxarch.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,10 +453,24 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
453453
switch (intrinsic)
454454
{
455455
case NI_SSE_Add:
456+
case NI_SSE_And:
457+
case NI_SSE_AndNot:
458+
case NI_SSE_Divide:
459+
case NI_SSE_Max:
460+
case NI_SSE_Min:
461+
case NI_SSE_MoveHighToLow:
462+
case NI_SSE_MoveLowToHigh:
463+
case NI_SSE_Multiply:
464+
case NI_SSE_Or:
465+
case NI_SSE_Subtract:
466+
case NI_SSE_UnpackHigh:
467+
case NI_SSE_UnpackLow:
468+
case NI_SSE_Xor:
456469
assert(sig->numArgs == 2);
470+
assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
457471
op2 = impSIMDPopStack(TYP_SIMD16);
458472
op1 = impSIMDPopStack(TYP_SIMD16);
459-
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE_Add, TYP_FLOAT, 16);
473+
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
460474
break;
461475

462476
default:

src/jit/instrsxarch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,10 @@ INST3( movapd, "movapd" , 0, IUM_WR, 0, 0, PCKDBL(0x29), BAD_CODE, PCK
204204
INST3( movaps, "movaps" , 0, IUM_WR, 0, 0, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28))
205205
INST3( movupd, "movupd" , 0, IUM_WR, 0, 0, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10))
206206
INST3( movups, "movups" , 0, IUM_WR, 0, 0, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10))
207+
INST3( movhlps, "movhlps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x12))
207208
INST3( movlhps, "movlhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x16))
209+
INST3( unpckhps, "unpckhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x15))
210+
INST3( unpcklps, "unpcklps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x14))
208211

209212
INST3( shufps, "shufps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC6))
210213
INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC6))

0 commit comments

Comments
 (0)