Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 8225ec3

Browse files
committed
Adding basic containment support to the x86 HWIntrinsics
1 parent 140ba02 commit 8225ec3

File tree

6 files changed

+388
-78
lines changed

6 files changed

+388
-78
lines changed

src/jit/codegenlinear.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ void genPutArgStkSIMD12(GenTree* treeNode);
116116

117117
#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
118118
void genHWIntrinsic(GenTreeHWIntrinsic* node);
119+
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
119120
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
120121
void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
121122
void genSSE3Intrinsic(GenTreeHWIntrinsic* node);

src/jit/emitfmtsxarch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ IF_DEF(RRD_MRD, IS_GM_RD|IS_R1_RD, DSP) // read reg , read [
123123
IF_DEF(RWR_MRD, IS_GM_RD|IS_R1_WR, DSP) // write reg , read [mem]
124124
IF_DEF(RRW_MRD, IS_GM_RD|IS_R1_RW, DSP) // r/w reg , read [mem]
125125

126+
IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP) // write reg , read reg2 , read [mem]
126127
IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR, DSP) // write reg , offset mem
127128

128129
IF_DEF(MRD_RRD, IS_GM_RD|IS_R1_RD, DSP) // read [mem], read reg
@@ -147,6 +148,8 @@ IF_DEF(RRD_SRD, IS_SF_RD|IS_R1_RD, NONE) // read reg , read [
147148
IF_DEF(RWR_SRD, IS_SF_RD|IS_R1_WR, NONE) // write reg , read [stk]
148149
IF_DEF(RRW_SRD, IS_SF_RD|IS_R1_RW, NONE) // r/w reg , read [stk]
149150

151+
IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE) // write reg , read reg2, read [stk]
152+
150153
IF_DEF(SRD_RRD, IS_SF_RD|IS_R1_RD, NONE) // read [stk], read reg
151154
IF_DEF(SWR_RRD, IS_SF_WR|IS_R1_RD, NONE) // write [stk], read reg
152155
IF_DEF(SRW_RRD, IS_SF_RW|IS_R1_RD, NONE) // r/w [stk], read reg
@@ -170,6 +173,8 @@ IF_DEF(RRD_ARD, IS_AM_RD|IS_R1_RD, AMD ) // read reg , read [
170173
IF_DEF(RWR_ARD, IS_AM_RD|IS_R1_WR, AMD ) // write reg , read [adr]
171174
IF_DEF(RRW_ARD, IS_AM_RD|IS_R1_RW, AMD ) // r/w reg , read [adr]
172175

176+
IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD ) // write reg , read reg2, read [adr]
177+
173178
IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD ) // read [adr], read reg
174179
IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD ) // write [adr], read reg
175180
IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD ) // r/w [adr], read reg

src/jit/emitxarch.cpp

Lines changed: 206 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1998,8 +1998,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
19981998
// BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
19991999
assert(ins != INS_bt);
20002000

2001-
assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2002-
|| (attrSize == EA_16BYTE) // only for x64
2001+
assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2002+
|| (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
20032003
|| (ins == INS_movzx) || (ins == INS_movsx));
20042004
size = 3;
20052005
}
@@ -2588,6 +2588,8 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
25882588
return IF_RWR_MRD;
25892589
case IF_RRW_ARD:
25902590
return IF_RRW_MRD;
2591+
case IF_RWR_RRD_ARD:
2592+
return IF_RWR_RRD_MRD;
25912593

25922594
case IF_ARD_RRD:
25932595
return IF_MRD_RRD;
@@ -3889,6 +3891,82 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN
38893891
emitCurIGsize += sz;
38903892
}
38913893

3894+
void emitter::emitIns_R_A(
3895+
instruction ins, emitAttr attr, regNumber reg1, regNumber baseReg, regNumber indxReg, size_t scale, ssize_t offs)
3896+
{
3897+
instrDesc* id = emitNewInstrAmd(attr, offs);
3898+
id->idIns(ins);
3899+
id->idInsFmt(IF_RRW_ARD);
3900+
id->idReg1(reg1);
3901+
id->idAddr()->iiaAddrMode.amBaseReg = baseReg;
3902+
id->idAddr()->iiaAddrMode.amIndxReg = indxReg;
3903+
id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale);
3904+
3905+
assert(emitGetInsAmdAny(id) == offs);
3906+
3907+
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
3908+
id->idCodeSize(sz);
3909+
3910+
dispIns(id);
3911+
emitCurIGsize += sz;
3912+
}
3913+
3914+
void emitter::emitIns_R_R_A(instruction ins,
3915+
emitAttr attr,
3916+
regNumber reg1,
3917+
regNumber reg2,
3918+
regNumber baseReg,
3919+
regNumber indxReg,
3920+
size_t scale,
3921+
ssize_t offs)
3922+
{
3923+
assert(IsSSEOrAVXInstruction(ins));
3924+
assert(IsThreeOperandAVXInstruction(ins));
3925+
3926+
instrDesc* id = emitNewInstrAmd(attr, offs);
3927+
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
3928+
3929+
id->idIns(ins);
3930+
id->idInsFmt(IF_RWR_RRD_ARD);
3931+
id->idReg1(reg1);
3932+
id->idReg2(reg2);
3933+
id->idAddr()->iiaAddrMode.amBaseReg = baseReg;
3934+
id->idAddr()->iiaAddrMode.amIndxReg = indxReg;
3935+
id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale);
3936+
3937+
assert(emitGetInsAmdAny(id) == offs);
3938+
3939+
id->idCodeSize(sz);
3940+
dispIns(id);
3941+
emitCurIGsize += sz;
3942+
}
3943+
3944+
void emitter::emitIns_R_R_C(
3945+
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
3946+
{
3947+
assert(IsSSEOrAVXInstruction(ins));
3948+
assert(IsThreeOperandAVXInstruction(ins));
3949+
3950+
// Static always need relocs
3951+
if (!jitStaticFldIsGlobAddr(fldHnd))
3952+
{
3953+
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3954+
}
3955+
3956+
instrDesc* id = emitNewInstrDsp(attr, offs);
3957+
UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
3958+
3959+
id->idIns(ins);
3960+
id->idInsFmt(IF_RWR_RRD_MRD);
3961+
id->idReg1(reg1);
3962+
id->idReg2(reg2);
3963+
id->idAddr()->iiaFieldHnd = fldHnd;
3964+
3965+
id->idCodeSize(sz);
3966+
dispIns(id);
3967+
emitCurIGsize += sz;
3968+
}
3969+
38923970
/*****************************************************************************
38933971
*
38943972
* Add an instruction with three register operands.
@@ -3915,6 +3993,30 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg,
39153993
emitCurIGsize += sz;
39163994
}
39173995

3996+
void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
3997+
{
3998+
assert(IsSSEOrAVXInstruction(ins));
3999+
assert(IsThreeOperandAVXInstruction(ins));
4000+
4001+
instrDesc* id = emitNewInstr(attr);
4002+
UNATIVE_OFFSET sz =
4003+
emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4004+
4005+
id->idIns(ins);
4006+
id->idInsFmt(IF_RWR_RRD_SRD);
4007+
id->idReg1(reg1);
4008+
id->idReg2(reg2);
4009+
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4010+
4011+
#ifdef DEBUG
4012+
id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4013+
#endif
4014+
4015+
id->idCodeSize(sz);
4016+
dispIns(id);
4017+
emitCurIGsize += sz;
4018+
}
4019+
39184020
/**********************************************************************************
39194021
* emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
39204022
*
@@ -4888,9 +4990,49 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
48884990
}
48894991

48904992
#if FEATURE_HW_INTRINSICS
4993+
void emitter::emitIns_SIMD_R_R_A(instruction ins,
4994+
regNumber reg,
4995+
regNumber reg1,
4996+
regNumber baseReg,
4997+
regNumber indxReg,
4998+
size_t scale,
4999+
ssize_t offs,
5000+
var_types simdtype)
5001+
{
5002+
if (UseVEXEncoding())
5003+
{
5004+
emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, baseReg, indxReg, scale, offs);
5005+
}
5006+
else
5007+
{
5008+
if (reg1 != reg)
5009+
{
5010+
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
5011+
}
5012+
emitIns_R_A(ins, emitTypeSize(simdtype), reg, baseReg, indxReg, scale, offs);
5013+
}
5014+
}
5015+
5016+
void emitter::emitIns_SIMD_R_R_C(
5017+
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype)
5018+
{
5019+
if (UseVEXEncoding())
5020+
{
5021+
emitIns_R_R_C(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs);
5022+
}
5023+
else
5024+
{
5025+
if (reg1 != reg)
5026+
{
5027+
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
5028+
}
5029+
emitIns_R_C(ins, emitTypeSize(simdtype), reg, fldHnd, offs);
5030+
}
5031+
}
5032+
48915033
void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
48925034
{
4893-
if (UseVEXEncoding() && reg1 != reg)
5035+
if (UseVEXEncoding())
48945036
{
48955037
emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
48965038
}
@@ -4903,6 +5045,22 @@ void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1,
49035045
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2);
49045046
}
49055047
}
5048+
5049+
void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype)
5050+
{
5051+
if (UseVEXEncoding())
5052+
{
5053+
emitIns_R_R_S(ins, emitTypeSize(simdtype), reg, reg1, varx, offs);
5054+
}
5055+
else
5056+
{
5057+
if (reg1 != reg)
5058+
{
5059+
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
5060+
}
5061+
emitIns_R_S(ins, emitTypeSize(simdtype), reg, varx, offs);
5062+
}
5063+
}
49065064
#endif
49075065

49085066
/*****************************************************************************
@@ -6918,6 +7076,11 @@ void emitter::emitDispIns(
69187076
emitDispAddrMode(id);
69197077
break;
69207078

7079+
case IF_RWR_RRD_ARD:
7080+
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7081+
emitDispAddrMode(id);
7082+
break;
7083+
69217084
case IF_ARD_RRD:
69227085
case IF_AWR_RRD:
69237086
case IF_ARW_RRD:
@@ -7061,6 +7224,12 @@ void emitter::emitDispIns(
70617224

70627225
break;
70637226

7227+
case IF_RWR_RRD_SRD:
7228+
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7229+
emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7230+
id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7231+
break;
7232+
70647233
case IF_RRD_RRD:
70657234
case IF_RWR_RRD:
70667235
case IF_RRW_RRD:
@@ -7189,6 +7358,12 @@ void emitter::emitDispIns(
71897358
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
71907359
break;
71917360

7361+
case IF_RWR_RRD_MRD:
7362+
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7363+
offs = emitGetInsDsp(id);
7364+
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
7365+
break;
7366+
71927367
case IF_RWR_MRD_OFF:
71937368

71947369
printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
@@ -7635,12 +7810,17 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
76357810
// Therefore, add VEX prefix is one is not already present.
76367811
code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
76377812

7638-
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
76397813
if (IsDstDstSrcAVXInstruction(ins))
76407814
{
7641-
// Encode source operand reg in 'vvvv' bits in 1's complement form
7642-
// The order of operands are reversed, therefore use reg2 as the source.
7643-
code = insEncodeReg3456(ins, id->idReg1(), size, code);
7815+
regNumber src1 = id->idReg2();
7816+
7817+
if (id->idInsFmt() != IF_RWR_RRD_ARD)
7818+
{
7819+
src1 = id->idReg1();
7820+
}
7821+
7822+
// encode source operand reg in 'vvvv' bits in 1's compliement form
7823+
code = insEncodeReg3456(ins, src1, size, code);
76447824
}
76457825

76467826
// Emit the REX prefix if required
@@ -10988,6 +11168,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1098811168
case IF_RRD_ARD:
1098911169
case IF_RWR_ARD:
1099011170
case IF_RRW_ARD:
11171+
case IF_RWR_RRD_ARD:
1099111172
code = insCodeRM(ins);
1099211173
code = AddVexPrefixIfNeeded(ins, code, size);
1099311174
regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
@@ -11082,6 +11263,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1108211263
case IF_RRD_SRD:
1108311264
case IF_RWR_SRD:
1108411265
case IF_RRW_SRD:
11266+
case IF_RWR_RRD_SRD:
1108511267
code = insCodeRM(ins);
1108611268

1108711269
// 4-byte AVX instructions are special cased inside emitOutputSV
@@ -11094,16 +11276,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1109411276
{
1109511277
code = AddVexPrefixIfNeeded(ins, code, size);
1109611278

11097-
// In case of AVX instructions that take 3 operands, encode reg1 as first source.
11098-
// Note that reg1 is both a source and a destination.
11099-
//
11100-
// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11101-
// now we use the single source as source1 and source2.
11102-
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
1110311279
if (IsDstDstSrcAVXInstruction(ins))
1110411280
{
11281+
regNumber src1 = id->idReg2();
11282+
11283+
if (id->idInsFmt() != IF_RWR_RRD_SRD)
11284+
{
11285+
src1 = id->idReg1();
11286+
}
11287+
1110511288
// encode source operand reg in 'vvvv' bits in 1's compliement form
11106-
code = insEncodeReg3456(ins, id->idReg1(), size, code);
11289+
code = insEncodeReg3456(ins, src1, size, code);
1110711290
}
1110811291

1110911292
regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
@@ -11165,6 +11348,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1116511348
case IF_RRD_MRD:
1116611349
case IF_RWR_MRD:
1116711350
case IF_RRW_MRD:
11351+
case IF_RWR_RRD_MRD:
1116811352
code = insCodeRM(ins);
1116911353
// Special case 4-byte AVX instructions
1117011354
if (Is4ByteAVXInstruction(ins))
@@ -11175,16 +11359,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1117511359
{
1117611360
code = AddVexPrefixIfNeeded(ins, code, size);
1117711361

11178-
// In case of AVX instructions that take 3 operands, encode reg1 as first source.
11179-
// Note that reg1 is both a source and a destination.
11180-
//
11181-
// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11182-
// now we use the single source as source1 and source2.
11183-
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
1118411362
if (IsDstDstSrcAVXInstruction(ins))
1118511363
{
11364+
regNumber src1 = id->idReg2();
11365+
11366+
if (id->idInsFmt() != IF_RWR_RRD_MRD)
11367+
{
11368+
src1 = id->idReg1();
11369+
}
11370+
1118611371
// encode source operand reg in 'vvvv' bits in 1's compliement form
11187-
code = insEncodeReg3456(ins, id->idReg1(), size, code);
11372+
code = insEncodeReg3456(ins, src1, size, code);
1118811373
}
1118911374

1119011375
regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);

0 commit comments

Comments
 (0)