Skip to content
Permalink
Browse files
[SIMD] Intel support for saturating integer arithmetic operations
https://bugs.webkit.org/show_bug.cgi?id=248862
rdar://103061350

Reviewed by Yusuke Suzuki.

Add WASM SIMD saturating integer arithmetic operations.
https://github.com/WebAssembly/simd/blob/main/proposals/simd/SIMD.md#saturating-integer-arithmetic

* Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::vectorMax):
(JSC::MacroAssemblerX86_64::vectorMin):
(JSC::MacroAssemblerX86_64::vectorAbs):
(JSC::MacroAssemblerX86_64::vectorAddSat):
(JSC::MacroAssemblerX86_64::vectorSubSat):
(JSC::MacroAssemblerX86_64::vectorAvgRound):
(JSC::MacroAssemblerX86_64::vectorMulSat):
* Source/JavaScriptCore/assembler/X86Assembler.h:
(JSC::X86Assembler::paddsb_rr):
(JSC::X86Assembler::vpaddsb_rr):
(JSC::X86Assembler::paddusb_rr):
(JSC::X86Assembler::vpaddusb_rr):
(JSC::X86Assembler::paddsw_rr):
(JSC::X86Assembler::vpaddsw_rr):
(JSC::X86Assembler::paddusw_rr):
(JSC::X86Assembler::vpaddusw_rr):
(JSC::X86Assembler::psubsb_rr):
(JSC::X86Assembler::vpsubsb_rr):
(JSC::X86Assembler::psubusb_rr):
(JSC::X86Assembler::vpsubusb_rr):
(JSC::X86Assembler::psubsw_rr):
(JSC::X86Assembler::vpsubsw_rr):
(JSC::X86Assembler::psubusw_rr):
(JSC::X86Assembler::vpsubusw_rr):
(JSC::X86Assembler::pmaxsb_rr):
(JSC::X86Assembler::vpmaxsb_rr):
(JSC::X86Assembler::pmaxsw_rr):
(JSC::X86Assembler::vpmaxsw_rr):
(JSC::X86Assembler::pmaxsd_rr):
(JSC::X86Assembler::vpmaxsd_rr):
(JSC::X86Assembler::pmaxub_rr):
(JSC::X86Assembler::vpmaxub_rr):
(JSC::X86Assembler::pmaxuw_rr):
(JSC::X86Assembler::vpmaxuw_rr):
(JSC::X86Assembler::pmaxud_rr):
(JSC::X86Assembler::vpmaxud_rr):
(JSC::X86Assembler::pminsb_rr):
(JSC::X86Assembler::vpminsb_rr):
(JSC::X86Assembler::pminsw_rr):
(JSC::X86Assembler::vpminsw_rr):
(JSC::X86Assembler::pminsd_rr):
(JSC::X86Assembler::vpminsd_rr):
(JSC::X86Assembler::pminub_rr):
(JSC::X86Assembler::vpminub_rr):
(JSC::X86Assembler::pminuw_rr):
(JSC::X86Assembler::vpminuw_rr):
(JSC::X86Assembler::pminud_rr):
(JSC::X86Assembler::vpminud_rr):
(JSC::X86Assembler::pavgb_rr):
(JSC::X86Assembler::vpavgb_rr):
(JSC::X86Assembler::pavgw_rr):
(JSC::X86Assembler::vpavgw_rr):
(JSC::X86Assembler::pabsb_rr):
(JSC::X86Assembler::vpabsb_rr):
(JSC::X86Assembler::pabsw_rr):
(JSC::X86Assembler::vpabsw_rr):
(JSC::X86Assembler::pabsd_rr):
(JSC::X86Assembler::vpabsd_rr):
(JSC::X86Assembler::vpxor_rr):
(JSC::X86Assembler::vpsubq_rr):
(JSC::X86Assembler::vblendvpd_rr):
(JSC::X86Assembler::vpmulhrsw_rr):
(JSC::X86Assembler::vpcmpeqw_rr):
(JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
* Source/JavaScriptCore/b3/air/AirOpcode.opcodes:
* Source/JavaScriptCore/wasm/WasmAirIRGenerator.cpp:
(JSC::Wasm::AirIRGenerator::addSIMDSwizzle):
(JSC::Wasm::AirIRGenerator::addSIMDV_VV):

Canonical link: https://commits.webkit.org/257468@main
  • Loading branch information
hyjorc1 authored and Yijia Huang committed Dec 7, 2022
1 parent a05af34 commit 5a0f6d496df12411613df873460a8b0717a65459
Show file tree
Hide file tree
Showing 5 changed files with 828 additions and 40 deletions.
@@ -3972,6 +3972,13 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<Assembler> {
return X86Assembler::patchableJumpSize();
}

static bool supportsSSE4_1()
{
if (s_sse4_1CheckState == CPUIDCheckState::NotChecked)
collectCPUFeatures();
return s_sse4_1CheckState == CPUIDCheckState::Set;
}

static bool supportsFloatingPointRounding()
{
if (s_sse4_1CheckState == CPUIDCheckState::NotChecked)
@@ -2353,29 +2353,131 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorMax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
UNUSED_PARAM(right); UNUSED_PARAM(dest);
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(left);
else {
ASSERT(simdInfo.signMode != SIMDSignMode::None);
if (simdInfo.signMode == SIMDSignMode::Signed)
UNUSED_PARAM(left);
else
UNUSED_PARAM(left);
ASSERT(simdInfo.signMode != SIMDSignMode::None);

switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpmaxsb_rr(right, left, dest);
else
m_assembler.vpmaxub_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed) {
if (supportsSSE4_1())
m_assembler.pmaxsb_rr(right, dest);
else
RELEASE_ASSERT_NOT_REACHED();
} else
m_assembler.pmaxub_rr(right, dest);
}
return;
case SIMDLane::i16x8:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpmaxsw_rr(right, left, dest);
else
m_assembler.vpmaxuw_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.pmaxsw_rr(right, dest);
else {
if (supportsSSE4_1())
m_assembler.pmaxuw_rr(right, dest);
else
RELEASE_ASSERT_NOT_REACHED();
}
}
return;
case SIMDLane::i32x4:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpmaxsd_rr(right, left, dest);
else
m_assembler.vpmaxud_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed) {
if (supportsSSE4_1())
m_assembler.pmaxsd_rr(right, dest);
else
RELEASE_ASSERT_NOT_REACHED();
} else
m_assembler.pmaxud_rr(right, dest);
}
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorMin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
UNUSED_PARAM(right); UNUSED_PARAM(dest);
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(left);
else {
ASSERT(simdInfo.signMode != SIMDSignMode::None);
if (simdInfo.signMode == SIMDSignMode::Signed)
UNUSED_PARAM(left);
else
UNUSED_PARAM(left);
ASSERT(simdInfo.signMode != SIMDSignMode::None);

switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpminsb_rr(right, left, dest);
else
m_assembler.vpminub_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed) {
if (supportsSSE4_1())
m_assembler.pminsb_rr(right, dest);
else
RELEASE_ASSERT_NOT_REACHED();
} else
m_assembler.pminub_rr(right, dest);
}
return;
case SIMDLane::i16x8:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpminsw_rr(right, left, dest);
else
m_assembler.vpminuw_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.pminsw_rr(right, dest);
else {
if (supportsSSE4_1())
m_assembler.pminuw_rr(right, dest);
else
RELEASE_ASSERT_NOT_REACHED();
}
}
return;
case SIMDLane::i32x4:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpminsd_rr(right, left, dest);
else
m_assembler.vpminud_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed) {
if (supportsSSE4_1())
m_assembler.pminsd_rr(right, dest);
else
RELEASE_ASSERT_NOT_REACHED();
} else
m_assembler.pminud_rr(right, dest);
}
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

@@ -2430,11 +2532,48 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorAbs(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(dest);
else
UNUSED_PARAM(dest);
UNUSED_PARAM(input);
switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVX())
m_assembler.vpabsb_rr(input, dest);
else if (supportsSupplementalSSE3())
m_assembler.pabsb_rr(input, dest);
else
RELEASE_ASSERT_NOT_REACHED();
return;
case SIMDLane::i16x8:
if (supportsAVX())
m_assembler.vpabsw_rr(input, dest);
else if (supportsSupplementalSSE3())
m_assembler.pabsw_rr(input, dest);
else
RELEASE_ASSERT_NOT_REACHED();
return;
case SIMDLane::i32x4:
if (supportsAVX())
m_assembler.vpabsd_rr(input, dest);
else if (supportsSupplementalSSE3())
m_assembler.pabsd_rr(input, dest);
else
RELEASE_ASSERT_NOT_REACHED();
return;
case SIMDLane::i64x2:
// https://github.com/WebAssembly/simd/pull/413
if (supportsAVX()) {
m_assembler.vpxor_rr(dest, dest, dest);
m_assembler.vpsubq_rr(input, dest, dest);
m_assembler.vblendvpd_rr(input, dest, input, dest);
} else if (supportsSSE4_1()) {
// FIXME: SSE4_1
RELEASE_ASSERT_NOT_REACHED();
} else {
// FIXME: SSE2
RELEASE_ASSERT_NOT_REACHED();
}
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorNeg(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -2605,22 +2744,82 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorAddSat(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
ASSERT(scalarTypeIsIntegral(simdInfo.lane));
ASSERT(simdInfo.signMode != SIMDSignMode::None);
UNUSED_PARAM(simdInfo);
UNUSED_PARAM(left);
UNUSED_PARAM(right);
UNUSED_PARAM(dest);

switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpaddsb_rr(right, left, dest);
else
m_assembler.vpaddusb_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.paddsb_rr(right, dest);
else
m_assembler.paddusb_rr(right, dest);
}
return;
case SIMDLane::i16x8:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpaddsw_rr(right, left, dest);
else
m_assembler.vpaddusw_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.paddsw_rr(right, dest);
else
m_assembler.paddusw_rr(right, dest);
}
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorSubSat(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
ASSERT(scalarTypeIsIntegral(simdInfo.lane));
ASSERT(simdInfo.signMode != SIMDSignMode::None);
UNUSED_PARAM(simdInfo);
UNUSED_PARAM(left);
UNUSED_PARAM(right);
UNUSED_PARAM(dest);

switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpsubsb_rr(right, left, dest);
else
m_assembler.vpsubusb_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.psubsb_rr(right, dest);
else
m_assembler.psubusb_rr(right, dest);
}
return;
case SIMDLane::i16x8:
if (supportsAVX()) {
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.vpsubsw_rr(right, left, dest);
else
m_assembler.vpsubusw_rr(right, left, dest);
} else {
if (left != dest)
m_assembler.movapd_rr(left, dest);
if (simdInfo.signMode == SIMDSignMode::Signed)
m_assembler.psubsw_rr(right, dest);
else
m_assembler.psubusw_rr(right, dest);
}
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorLoad8Splat(Address address, FPRegisterID dest) { UNUSED_PARAM(address); UNUSED_PARAM(dest); }
@@ -2637,8 +2836,51 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
void vectorAllTrue(SIMDInfo simdInfo, FPRegisterID vec, RegisterID dest) { UNUSED_PARAM(simdInfo); UNUSED_PARAM(vec); UNUSED_PARAM(dest); }
void vectorBitmask(SIMDInfo simdInfo, FPRegisterID vec, RegisterID dest) { UNUSED_PARAM(simdInfo); UNUSED_PARAM(vec); UNUSED_PARAM(dest); }
void vectorExtaddPairwise(SIMDInfo simdInfo, FPRegisterID vec, FPRegisterID dest) { UNUSED_PARAM(simdInfo); UNUSED_PARAM(vec); UNUSED_PARAM(dest); }
void vectorAvgRound(SIMDInfo simdInfo, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(simdInfo); UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
void vectorMulSat(FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }

void vectorAvgRound(SIMDInfo simdInfo, FPRegisterID a, FPRegisterID b, FPRegisterID dest)
{
switch (simdInfo.lane) {
case SIMDLane::i8x16:
if (supportsAVX())
m_assembler.vpavgb_rr(b, a, dest);
else {
if (a != dest)
m_assembler.movapd_rr(a, dest);
m_assembler.pavgb_rr(b, dest);
}
return;
case SIMDLane::i16x8:
if (supportsAVX())
m_assembler.vpavgw_rr(b, a, dest);
else {
if (a != dest)
m_assembler.movapd_rr(a, dest);
m_assembler.pavgw_rr(b, dest);
}
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorMulSat(FPRegisterID a, FPRegisterID b, FPRegisterID dest, RegisterID scratchGPR, FPRegisterID scratchFPR)
{
// https://github.com/WebAssembly/simd/pull/365
if (supportsAVX()) {
m_assembler.vpmulhrsw_rr(b, a, dest);
m_assembler.movq_i64r(0x8000, scratchGPR);
vectorSplat(SIMDLane::i16x8, scratchGPR, scratchFPR);
m_assembler.vpcmpeqw_rr(scratchFPR, dest, scratchFPR);
m_assembler.vpxor_rr(scratchFPR, dest, dest);
} else if (supportsSupplementalSSE3()) {
// FIXME: SSSE3
RELEASE_ASSERT_NOT_REACHED();
} else {
// FIXME: SSE2
RELEASE_ASSERT_NOT_REACHED();
}

}
void vectorDotProductInt32(FPRegisterID a, FPRegisterID b, FPRegisterID dest, FPRegisterID) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }

void vectorSwizzle(FPRegisterID a, FPRegisterID b, FPRegisterID dest)

0 comments on commit 5a0f6d4

Please sign in to comment.