Skip to content
Permalink
Browse files
[WebAssembly SIMD] Support floating-point absolute value, negate, min…
…, max, and square-root on Intel

https://bugs.webkit.org/show_bug.cgi?id=249275
rdar://103326365

Reviewed by Yusuke Suzuki.

Adds support for missing floating-point opcodes to our WebAssembly SIMD support on Intel, with some
fixes to bugs that came up running the spec tests.

* Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::compareFloatingPointVectorUnordered):
(JSC::MacroAssemblerX86_64::vectorMax):
(JSC::MacroAssemblerX86_64::vectorMin):
(JSC::MacroAssemblerX86_64::vectorPmin):
(JSC::MacroAssemblerX86_64::vectorPmax):
(JSC::MacroAssemblerX86_64::vectorTruncSatSignedFloat64):
(JSC::MacroAssemblerX86_64::vectorSqrt):
(JSC::MacroAssemblerX86_64::vectorSshr8):
(JSC::MacroAssemblerX86_64::vectorUshr8):
(JSC::MacroAssemblerX86_64::vectorSshr):
* Source/JavaScriptCore/assembler/X86Assembler.h:
(JSC::X86Assembler::vmaxps_rrr):
(JSC::X86Assembler::vmaxpd_rrr):
(JSC::X86Assembler::vminps_rrr):
(JSC::X86Assembler::vminpd_rrr):
(JSC::X86Assembler::vsqrtps_rr):
(JSC::X86Assembler::vsqrtpd_rr):
(JSC::X86Assembler::vminpd_mrr):
(JSC::X86Assembler::vpsrlw_i8rr):
(JSC::X86Assembler::vpsrld_i8rr):
(JSC::X86Assembler::vpsrlq_i8rr):
(JSC::X86Assembler::vpsraw_i8rr):
(JSC::X86Assembler::vpsrad_i8rr):
(JSC::X86Assembler::vcmpeqpd_rrr): Deleted.
(JSC::X86Assembler::vpsraq_rrr): Deleted.
* Source/JavaScriptCore/b3/air/AirLowerMacros.cpp:
(JSC::B3::Air::lowerMacros):
* Source/JavaScriptCore/b3/air/AirOpcode.opcodes:
* Source/JavaScriptCore/wasm/WasmAirIRGenerator64.cpp:
(JSC::Wasm::AirIRGenerator64::addSIMDV_V):
(JSC::Wasm::AirIRGenerator64::addSIMDShift):

Canonical link: https://commits.webkit.org/257880@main
  • Loading branch information
David Degazio committed Dec 14, 2022
1 parent e0227fc commit 3a6ea5eb8b9b6395b6ee4fe11afb07861cea519c
Show file tree
Hide file tree
Showing 5 changed files with 341 additions and 66 deletions.
@@ -2418,6 +2418,19 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

DEFINE_SIGNED_SIMD_FUNCS(vectorExtractLane);

void compareFloatingPointVectorUnordered(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
RELEASE_ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));

using PackedCompareCondition = X86Assembler::PackedCompareCondition;

if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vcmpps_rrr(PackedCompareCondition::Unordered, right, left, dest);
else
m_assembler.vcmppd_rrr(PackedCompareCondition::Unordered, right, left, dest);
}

void compareFloatingPointVector(DoubleCondition cond, SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
@@ -2745,8 +2758,6 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorMax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
ASSERT(simdInfo.signMode != SIMDSignMode::None);

switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVXForSIMD()) {
@@ -2803,15 +2814,17 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
m_assembler.pmaxud_rr(right, dest);
}
return;
case SIMDLane::f32x4:
case SIMDLane::f64x2:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Should have expanded f32x4/f64x2 maximum before reaching macro assembler.");
break;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorMin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
ASSERT(simdInfo.signMode != SIMDSignMode::None);

switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVXForSIMD()) {
@@ -2868,23 +2881,33 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
m_assembler.pminud_rr(right, dest);
}
return;
case SIMDLane::f32x4:
case SIMDLane::f64x2:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Should have expanded f32x4/f64x2 minimum before reaching macro assembler.");
break;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorPmin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest, FPRegisterID)
void vectorPmin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
// right > left, dest = left
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vminps_rrr(right, left, dest);
else
m_assembler.vminpd_rrr(right, left, dest);
}

void vectorPmax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest, FPRegisterID)
void vectorPmax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
// left > right, dest = left
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vmaxps_rrr(right, left, dest);
else
m_assembler.vmaxpd_rrr(right, left, dest);
}

void vectorBitwiseSelect(FPRegisterID left, FPRegisterID right, FPRegisterID inputBitsAndDest)
@@ -3029,7 +3052,10 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
2147483647.0,
2147483647.0,
};
m_assembler.vcmpeqpd_rrr(src, src, scratchFPR);

using PackedCompareCondition = X86Assembler::PackedCompareCondition;

m_assembler.vcmppd_rrr(PackedCompareCondition::Equal, src, src, scratchFPR);
move(TrustedImmPtr(masks), scratchGPR);
m_assembler.vandpd_mrr(0, scratchGPR, scratchFPR, scratchFPR);
m_assembler.vminpd_rrr(scratchFPR, src, dest);
@@ -3069,8 +3095,12 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorSqrt(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vsqrtps_rr(input, dest);
else
m_assembler.vsqrtpd_rr(input, dest);
}

void vectorExtendLow(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -3272,6 +3302,46 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
}
}

void vectorSshr8(SIMDInfo simdInfo, FPRegisterID input, TrustedImm32 shift, FPRegisterID dest)
{
RELEASE_ASSERT(scalarTypeIsIntegral(simdInfo.lane));
RELEASE_ASSERT(simdInfo.lane != SIMDLane::i8x16);
RELEASE_ASSERT(supportsAVXForSIMD());
switch (simdInfo.lane) {
case SIMDLane::i16x8:
m_assembler.vpsraw_i8rr(shift.m_value, input, dest);
break;
case SIMDLane::i32x4:
m_assembler.vpsrad_i8rr(shift.m_value, input, dest);
break;
case SIMDLane::i64x2:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("i64x2 signed shift right is not supported natively on Intel.");
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid lane kind for signed vector right shift.");
}
}

void vectorUshr8(SIMDInfo simdInfo, FPRegisterID input, TrustedImm32 shift, FPRegisterID dest)
{
RELEASE_ASSERT(scalarTypeIsIntegral(simdInfo.lane));
RELEASE_ASSERT(simdInfo.lane != SIMDLane::i8x16);
RELEASE_ASSERT(supportsAVXForSIMD());
switch (simdInfo.lane) {
case SIMDLane::i16x8:
m_assembler.vpsrlw_i8rr(shift.m_value, input, dest);
break;
case SIMDLane::i32x4:
m_assembler.vpsrld_i8rr(shift.m_value, input, dest);
break;
case SIMDLane::i64x2:
m_assembler.vpsrlq_i8rr(shift.m_value, input, dest);
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid lane kind for unsigned vector right shift.");
}
}

void vectorUshr(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID shift, FPRegisterID dest)
{
ASSERT(scalarTypeIsIntegral(simdInfo.lane));
@@ -3309,8 +3379,7 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
m_assembler.vpsrad_rrr(shift, input, dest);
break;
case SIMDLane::i64x2:
// FIXME: This is AVX-512, and not implemented correctly right now.
m_assembler.vpsraq_rrr(shift, input, dest);
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("i64x2 signed shift right is not supported natively on Intel.");
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid lane kind for unsigned vector right shift.");
@@ -304,9 +304,11 @@ class X86Assembler {
OP2_CVTPD2PS_VsdWsd = 0x5A,
OP2_CVTDQ2PS_VsdWsd = 0x5B,
OP2_SUBSD_VsdWsd = 0x5C,
OP2_MINPD_VsdWsd = 0x5D,
OP2_MINPS_VpsWps = 0x5D,
OP2_MINPD_VpdWpd = 0x5D,
OP2_DIVSD_VsdWsd = 0x5E,
OP2_MAXPD_VsdWsd = 0x5F,
OP2_MAXPS_VpsWps = 0x5F,
OP2_MAXPD_VpdWpd = 0x5F,
OP2_PACKSSWB_VdqWdq = 0x63,
OP2_PACKUSWB_VdqWdq = 0x67,
OP2_PACKSSDW_VdqWdq = 0x6B,
@@ -317,7 +319,10 @@ class X86Assembler {
OP2_PSHUFD_VdqWdqIb = 0x70,
OP2_PSHUFLW_VdqWdqIb = 0x70,
OP2_PSHUFHW_VdqWdqIb = 0x70,
OP2_PSRLW_UdqIb = 0x71,
OP2_PSRAW_UdqIb = 0x71,
OP2_PSRLD_UdqIb = 0x72,
OP2_PSRAD_UdqIb = 0x72,
OP2_PSLLQ_UdqIb = 0x73,
OP2_PSRLQ_UdqIb = 0x73,
OP2_VZEROUPPER = 0x77,
@@ -402,7 +407,6 @@ class X86Assembler {
OP2_PSRLQ_VdqWdq = 0xD3,
OP2_PSRAW_VdqWdq = 0xE1,
OP2_PSRAD_VdqWdq = 0xE2,
OP2_PSRAQ_VdqWdq = 0xE3,
OP2_PSLLW_VdqWdq = 0xF1,
OP2_PSLLD_VdqWdq = 0xF2,
OP2_PSLLQ_VdqWdq = 0xF3,
@@ -524,6 +528,7 @@ class X86Assembler {
GROUP11_MOV = 0,

GROUP14_OP_PSLLQ = 6,
GROUP14_OP_PSRAQ = 4,
GROUP14_OP_PSRLQ = 2,

ESCAPE_D9_FSTP_singleReal = 3,
@@ -4781,6 +4786,38 @@ class X86Assembler {
m_formatter.vexNdsLigWigThreeByteOp(PRE_SSE_66, VexImpliedBytes::ThreeBytesOp38, OP3_PMINUD_VdqWdq, (RegisterID)vd, (RegisterID)left, (RegisterID)right);
}

void vmaxps_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/maxps
// VEX.128.NP.0F.WIG 5F /r VMAXPS xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_00, OP2_MAXPS_VpsWps, (RegisterID)xmm1, (RegisterID)xmm3, (RegisterID)xmm2);
}

void vmaxpd_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/maxpd
// VEX.128.66.0F.WIG 5F /r VMAXPD xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_MAXPD_VpdWpd, (RegisterID)xmm1, (RegisterID)xmm3, (RegisterID)xmm2);
}

void vminps_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/minps
// VEX.128.NP.0F.WIG 5D /r VMINPS xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_00, OP2_MINPS_VpsWps, (RegisterID)xmm1, (RegisterID)xmm3, (RegisterID)xmm2);
}

void vminpd_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/minpd
// VEX.128.66.0F.WIG 5D /r VMINPD xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_MINPD_VpdWpd, (RegisterID)xmm1, (RegisterID)xmm3, (RegisterID)xmm2);
}

void vpavgb_rrr(XMMRegisterID right, XMMRegisterID left, XMMRegisterID vd)
{
// https://www.felixcloutier.com/x86/pavgb:pavgw
@@ -5085,6 +5122,22 @@ class X86Assembler {
m_formatter.immediate8(static_cast<uint8_t>(rounding));
}

void vsqrtps_rr(XMMRegisterID src, XMMRegisterID dest)
{
// https://www.felixcloutier.com/x86/sqrtps
// VEX.128.0F.WIG 51 /r VSQRTPS xmm1, xmm2/m128
// A NA ModRM:reg (w) ModRM:r/m (r) NA NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_00, OP2_SQRTPS_VpsWps, (RegisterID)dest, (RegisterID)0, (RegisterID)src);
}

void vsqrtpd_rr(XMMRegisterID src, XMMRegisterID dest)
{
// https://www.felixcloutier.com/x86/sqrtpd
// VEX.128.66.0F.WIG 51 /r VSQRTPD xmm1, xmm2/m128
// A NA ModRM:reg (w) ModRM:r/m (r) NA NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_SQRTPD_VpdWpd, (RegisterID)dest, (RegisterID)0, (RegisterID)src);
}

void vpmaddwd_rrr(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
{
// https://www.felixcloutier.com/x86/pmaddwd
@@ -5197,28 +5250,12 @@ class X86Assembler {
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_CVTDQ2PD_VdqWdq, (RegisterID)vd, (RegisterID)0, (RegisterID)vn);
}

void vmaxpd_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/maxpd
// VEX.128.66.0F.WIG 5F /r VMAXPD xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_MAXPD_VsdWsd, (RegisterID)xmm1, (RegisterID)xmm2, (RegisterID)xmm3);
}

void vminpd_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/minpd
// VEX.128.66.0F.WIG 5D /r VMINPD xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_MINPD_VsdWsd, (RegisterID)xmm1, (RegisterID)xmm2, (RegisterID)xmm3);
}

void vminpd_mrr(int offset, RegisterID base, XMMRegisterID src2, XMMRegisterID dest)
{
// https://www.felixcloutier.com/x86/minpd
// VEX.128.66.0F.WIG 5D /r VMINPD xmm1, xmm2, xmm3/m128
// B NA ModRM:reg (w) VEX.vvvv (r) ModRM:r/m (r) NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_MINPD_VsdWsd, (RegisterID)dest, (RegisterID)src2, base, offset);
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_MINPD_VpdWpd, (RegisterID)dest, (RegisterID)src2, base, offset);
}

void vcmppd_rrr(uint8_t imm8, XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
@@ -5230,10 +5267,49 @@ class X86Assembler {
m_formatter.immediate8(imm8);
}

void vcmpeqpd_rrr(XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
void vpsrlw_i8rr(uint8_t shift, XMMRegisterID src, XMMRegisterID dst)
{
// https://www.felixcloutier.com/x86/cmppd
vcmppd_rrr(0, xmm3, xmm2, xmm1);
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
// VEX.128.66.0F.WIG 71 /2 ib VPSRLW xmm1, xmm2, imm8
// D NA VEX.vvvv (w) ModRM:r/m (r) imm8 NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRLW_UdqIb, (RegisterID)GROUP14_OP_PSRLQ, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(shift);
}

void vpsrld_i8rr(uint8_t shift, XMMRegisterID src, XMMRegisterID dst)
{
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
// VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8
// D NA VEX.vvvv (w) ModRM:r/m (r) imm8 NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRLD_UdqIb, (RegisterID)GROUP14_OP_PSRLQ, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(shift);
}

void vpsrlq_i8rr(uint8_t shift, XMMRegisterID src, XMMRegisterID dst)
{
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
// VEX.128.66.0F.WIG 73 /2 ib VPSRLQ xmm1, xmm2, imm8
// D NA VEX.vvvv (w) ModRM:r/m (r) imm8 NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRLQ_UdqIb, (RegisterID)GROUP14_OP_PSRLQ, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(shift);
}

void vpsraw_i8rr(uint8_t shift, XMMRegisterID src, XMMRegisterID dst)
{
// https://www.felixcloutier.com/x86/psraw:psrad:psraq
// VEX.128.66.0F.WIG 71 /4 ib VPSRAW xmm1, xmm2, imm8
// D NA VEX.vvvv (w) ModRM:r/m (r) imm8 NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRAW_UdqIb, (RegisterID)GROUP14_OP_PSRAQ, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(shift);
}

void vpsrad_i8rr(uint8_t shift, XMMRegisterID src, XMMRegisterID dst)
{
// https://www.felixcloutier.com/x86/psraw:psrad:psraq
// VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8
// D NA VEX.vvvv (w) ModRM:r/m (r) imm8 NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRAD_UdqIb, (RegisterID)GROUP14_OP_PSRAQ, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(shift);
}

void vcvttpd2dq_rr(XMMRegisterID xmm2, XMMRegisterID xmm1)
@@ -5540,16 +5616,6 @@ class X86Assembler {
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRAD_VdqWdq, (RegisterID)dest, (RegisterID)input, (RegisterID)shift);
}

void vpsraq_rrr(XMMRegisterID shift, XMMRegisterID input, XMMRegisterID dest)
{
// https://www.felixcloutier.com/x86/psraw:psrad:psraq
// FIXME: This is AVX-512, and the following is not correct format.
// m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRAQ_VdqWdq, (RegisterID)dest, (RegisterID)input, (RegisterID)shift);
UNUSED_PARAM(shift);
UNUSED_PARAM(input);
UNUSED_PARAM(dest);
}

void vmovd_rr(RegisterID src, XMMRegisterID dest)
{
// https://www.felixcloutier.com/x86/movd:movq
@@ -5598,15 +5664,6 @@ class X86Assembler {
m_formatter.vexNdsLigWigThreeByteOp(PRE_SSE_66, VexImpliedBytes::ThreeBytesOp38, OP3_PMADDUBSW_VpdWpd, (RegisterID)xmm1, (RegisterID)xmm2, (RegisterID)xmm3);
}

void vpsrld_i8rr(uint8_t imm8, XMMRegisterID vn, XMMRegisterID vd)
{
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
// VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8
// D NA VEX.vvvv (w) ModRM:r/m (r) imm8 NA
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_66, OP2_PSRLD_UdqIb, (RegisterID)GROUP14_OP_PSRLQ, (RegisterID)vd, (RegisterID)vn);
m_formatter.immediate8(imm8);
}

void vpblendw_i8rrr(uint8_t imm8, XMMRegisterID xmm3, XMMRegisterID xmm2, XMMRegisterID xmm1)
{
// https://www.felixcloutier.com/x86/pblendw

0 comments on commit 3a6ea5e

Please sign in to comment.