Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 94f82fb

Browse files
committed
Adding support for the SSE Shuffle intrinsic
1 parent fbc91fc commit 94f82fb

File tree

6 files changed

+166
-1
lines changed

6 files changed

+166
-1
lines changed

src/jit/compiler.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2065,6 +2065,13 @@ class Compiler
20652065
var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size);
20662066
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(
20672067
var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size);
2068+
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type,
2069+
GenTree* op1,
2070+
GenTree* op2,
2071+
GenTree* op3,
2072+
NamedIntrinsic hwIntrinsicID,
2073+
var_types baseType,
2074+
unsigned size);
20682075
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type,
20692076
GenTree* op1,
20702077
GenTree* op2,

src/jit/gentree.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17921,6 +17921,18 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(
1792117921
return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, baseType, size);
1792217922
}
1792317923

17924+
GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
17925+
GenTree* op1,
17926+
GenTree* op2,
17927+
GenTree* op3,
17928+
NamedIntrinsic hwIntrinsicID,
17929+
var_types baseType,
17930+
unsigned size)
17931+
{
17932+
return new (this, GT_HWIntrinsic)
17933+
GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, baseType, size);
17934+
}
17935+
1792417936
GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
1792517937
GenTree* op1,
1792617938
GenTree* op2,

src/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,95 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
353353
emit->emitIns_SIMD_R_R_R(INS_xorps, targetReg, targetReg, targetReg, TYP_SIMD16);
354354
break;
355355

356+
case NI_SSE_Shuffle:
357+
{
358+
GenTreeArgList* argList;
359+
360+
// Shuffle takes 3 operands, so op1 should be an arg list with two
361+
// additional node in the chain.
362+
assert(baseType == TYP_FLOAT);
363+
assert(op1->OperIsList());
364+
assert(op1->AsArgList()->Rest() != nullptr);
365+
assert(op1->AsArgList()->Rest()->Rest() != nullptr);
366+
assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
367+
assert(op2 == nullptr);
368+
369+
argList = op1->AsArgList();
370+
op1 = argList->Current();
371+
op1Reg = op1->gtRegNum;
372+
genConsumeRegs(op1);
373+
374+
argList = argList->Rest();
375+
op2 = argList->Current();
376+
op2Reg = op2->gtRegNum;
377+
genConsumeRegs(op2);
378+
379+
argList = argList->Rest();
380+
op3 = argList->Current();
381+
genConsumeRegs(op3);
382+
383+
if (op3->IsCnsIntOrI())
384+
{
385+
ssize_t ival = op3->AsIntConCommon()->IconValue();
386+
emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op2Reg, (int)ival, TYP_SIMD16);
387+
}
388+
else
389+
{
390+
// We emit a fallback case for the scenario when op3 is not a constant. This should normally
391+
// happen when the intrinsic is called indirectly, such as via Reflection. However, it can
392+
// also occur if the consumer calls it directly and just doesn't pass a constant value.
393+
394+
const unsigned jmpCount = 256;
395+
BasicBlock* jmpTable[jmpCount];
396+
397+
unsigned jmpTableBase = emit->emitBBTableDataGenBeg(jmpCount, true);
398+
unsigned jmpTableOffs = 0;
399+
400+
// Emit the jump table
401+
402+
JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTableBase);
403+
404+
for (unsigned i = 0; i < jmpCount; i++)
405+
{
406+
jmpTable[i] = genCreateTempLabel();
407+
JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, jmpTable[i]->bbNum);
408+
emit->emitDataGenData(i, jmpTable[i]);
409+
}
410+
411+
emit->emitDataGenEnd();
412+
413+
// Compute and jump to the appropriate offset in the switch table
414+
415+
regNumber baseReg = node->ExtractTempReg(); // the start of the switch table
416+
regNumber offsReg = node->GetSingleTempReg(); // the offset into the switch table
417+
418+
emit->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), offsReg, compiler->eeFindJitDataOffs(jmpTableBase),
419+
0);
420+
421+
emit->emitIns_R_ARX(INS_mov, EA_4BYTE, offsReg, offsReg, op3->gtRegNum, 4, 0);
422+
emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, baseReg);
423+
emit->emitIns_R_R(INS_add, EA_PTRSIZE, offsReg, baseReg);
424+
emit->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), offsReg);
425+
426+
// Emit the switch table entries
427+
428+
BasicBlock* switchTableBeg = genCreateTempLabel();
429+
BasicBlock* switchTableEnd = genCreateTempLabel();
430+
431+
genDefineTempLabel(switchTableBeg);
432+
433+
for (unsigned i = 0; i < jmpCount; i++)
434+
{
435+
genDefineTempLabel(jmpTable[i]);
436+
emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op2Reg, i, TYP_SIMD16);
437+
emit->emitIns_J(INS_jmp, switchTableEnd);
438+
}
439+
440+
genDefineTempLabel(switchTableEnd);
441+
}
442+
break;
443+
}
444+
356445
case NI_SSE_Sqrt:
357446
assert(baseType == TYP_FLOAT);
358447
assert(op2 == nullptr);

src/jit/hwintrinsicxarch.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,11 +468,35 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
468468
GenTree* left = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, 16);
469469
GenTree* right = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, 16);
470470
GenTree* control = gtNewIconNode(68, TYP_UBYTE);
471-
471+
472472
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, 16);
473473
break;
474474
}
475475

476+
case NI_SSE_Shuffle:
477+
assert(sig->numArgs == 3);
478+
assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
479+
480+
op3 = impStackTop().val;
481+
482+
if (op3->IsCnsIntOrI() || mustExpand)
483+
{
484+
impPopStack(); // Pop the value we peeked at
485+
op2 = impSIMDPopStack(TYP_SIMD16);
486+
op1 = impSIMDPopStack(TYP_SIMD16);
487+
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsic, TYP_FLOAT, 16);
488+
}
489+
else
490+
{
491+
// When op3 is not a constant and we are not being forced to expand, we need to
492+
// return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
493+
// intrinsic method is recursive and will be forced to expand, at which point
494+
// we emit some less efficient fallback code.
495+
496+
return nullptr;
497+
}
498+
break;
499+
476500
case NI_SSE_Add:
477501
case NI_SSE_And:
478502
case NI_SSE_AndNot:

src/jit/lowerxarch.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2337,6 +2337,18 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
23372337
break;
23382338
}
23392339

2340+
case NI_SSE_Shuffle:
2341+
{
2342+
assert(op1->OperIsList());
2343+
GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current();
2344+
2345+
if (op3->IsCnsIntOrI())
2346+
{
2347+
MakeSrcContained(node, op3);
2348+
}
2349+
break;
2350+
}
2351+
23402352
default:
23412353
assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END));
23422354
break;

src/jit/lsraxarch.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2534,13 +2534,34 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree,
25342534
info->srcCount += GetOperandInfo(op1);
25352535
}
25362536
}
2537+
25372538
if (op2 != nullptr)
25382539
{
25392540
info->srcCount += GetOperandInfo(op2);
25402541
}
25412542

25422543
switch (intrinsicID)
25432544
{
2545+
case NI_SSE_Shuffle:
2546+
{
2547+
assert(op1->OperIsList());
2548+
GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current();
2549+
2550+
if (!op3->isContainedIntOrIImmed())
2551+
{
2552+
assert(!op3->IsCnsIntOrI());
2553+
2554+
// We need two extra reg when op3 isn't a constant so
2555+
// the offset into the jump table for the fallback path
2556+
// can be computed.
2557+
2558+
info->internalIntCount = 2;
2559+
info->setInternalCandidates(this, allRegs(TYP_INT));
2560+
break;
2561+
}
2562+
break;
2563+
}
2564+
25442565
#ifdef _TARGET_X86_
25452566
case NI_SSE42_Crc32:
25462567
{

0 commit comments

Comments
 (0)