Skip to content
Permalink
Browse files
[WebAssembly SIMD] Support integer and floating-point arithmetic on I…
…ntel

https://bugs.webkit.org/show_bug.cgi?id=248549

Reviewed by Justin Michaud.

Adds support for basic SIMD integer arithmetic and floating-point arithmetic
instructions on Intel.

* Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::vectorAdd):
(JSC::MacroAssemblerX86_64::vectorSub):
(JSC::MacroAssemblerX86_64::vectorMul):
(JSC::MacroAssemblerX86_64::vectorDiv):
(JSC::MacroAssemblerX86_64::vectorCeil):
(JSC::MacroAssemblerX86_64::vectorFloor):
(JSC::MacroAssemblerX86_64::vectorTrunc):
(JSC::MacroAssemblerX86_64::vectorNearest):
(JSC::MacroAssemblerX86_64::vectorDotProductInt32):
(JSC::MacroAssemblerX86_64::vectorNeg): Deleted.
* Source/JavaScriptCore/assembler/X86Assembler.h:
(JSC::X86Assembler::vectorFadd):
(JSC::X86Assembler::vectorAdd):
(JSC::X86Assembler::vectorFsub):
(JSC::X86Assembler::vectorSub):
(JSC::X86Assembler::vectorFmul):
(JSC::X86Assembler::vectorMul):
(JSC::X86Assembler::vectorFdiv):
(JSC::X86Assembler::vroundps_rr):
(JSC::X86Assembler::vroundpd_rr):
(JSC::X86Assembler::vpmaddwd_rrr):
(JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
* Source/JavaScriptCore/b3/air/AirOpcode.opcodes:
* Source/JavaScriptCore/wasm/WasmAirIRGenerator.cpp:
(JSC::Wasm::AirIRGenerator::addSIMDV_V):

Canonical link: https://commits.webkit.org/257511@main
  • Loading branch information
David Degazio committed Dec 7, 2022
1 parent 4a8000a commit 155740a50205d47608751bd4745d562202bca427
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 48 deletions.
@@ -817,11 +817,15 @@ void MacroAssemblerX86Common::collectCPUFeatures()
size_t valSize = sizeof(val);
int rc = sysctlbyname("hw.optional.bmi1", &val, &valSize, nullptr, 0);
s_bmi1CheckState = (rc >= 0 && val) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;

rc = sysctlbyname("hw.optional.avx2_0", &val, &valSize, nullptr, 0);
s_avx2CheckState = (rc >= 0 && val) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
}
#else
{
CPUID cpuid = getCPUID(0x7);
s_bmi1CheckState = (cpuid[2] & (1 << 3)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
s_avx2CheckState = (cpuid[2] & (1 << 5)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
}
#endif
{
@@ -836,6 +840,7 @@ MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_supplemental
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_2CheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avx2CheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_bmi1CheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_popcntCheckState = CPUIDCheckState::NotChecked;
@@ -4013,6 +4013,20 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<Assembler> {
return false;
}

static bool supportsAVXForSIMD()
{
if (s_avxCheckState == CPUIDCheckState::NotChecked)
collectCPUFeatures();
return s_avxCheckState == CPUIDCheckState::Set;
}

static bool supportsAVX2()
{
if (s_avx2CheckState == CPUIDCheckState::NotChecked)
collectCPUFeatures();
return s_avx2CheckState == CPUIDCheckState::Set;
}

void lfence()
{
m_assembler.lfence();
@@ -4300,6 +4314,7 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<Assembler> {
JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_1CheckState;
JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_2CheckState;
JS_EXPORT_PRIVATE static CPUIDCheckState s_avxCheckState;
JS_EXPORT_PRIVATE static CPUIDCheckState s_avx2CheckState;
JS_EXPORT_PRIVATE static CPUIDCheckState s_lzcntCheckState;
JS_EXPORT_PRIVATE static CPUIDCheckState s_bmi1CheckState;
JS_EXPORT_PRIVATE static CPUIDCheckState s_popcntCheckState;
@@ -2180,25 +2180,25 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
{
switch (simdLane) {
case SIMDLane::i8x16:
if (supportsAVX())
if (supportsAVXForSIMD())
m_assembler.vpextrb_rr(lane.m_value, src, dest);
else
m_assembler.pextrb_rr(lane.m_value, src, dest);
break;
case SIMDLane::i16x8:
if (supportsAVX())
if (supportsAVXForSIMD())
m_assembler.vpextrw_rr(lane.m_value, src, dest);
else
m_assembler.pextrw_rr(lane.m_value, src, dest);
break;
case SIMDLane::i32x4:
if (supportsAVX())
if (supportsAVXForSIMD())
m_assembler.vpextrd_rr(lane.m_value, src, dest);
else
m_assembler.pextrd_rr(lane.m_value, src, dest);
break;
case SIMDLane::i64x2:
if (supportsAVX())
if (supportsAVXForSIMD())
m_assembler.vpextrq_rr(lane.m_value, src, dest);
else
m_assembler.pextrq_rr(lane.m_value, src, dest);
@@ -2320,35 +2320,99 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorAdd(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(left);
else
UNUSED_PARAM(left);
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
RELEASE_ASSERT(supportsAVXForSIMD());

switch (simdInfo.lane) {
case SIMDLane::f32x4:
m_assembler.vaddps_rrr(left, right, dest);
break;
case SIMDLane::f64x2:
m_assembler.vaddpd_rrr(left, right, dest);
break;
case SIMDLane::i8x16:
m_assembler.vpaddb_rrr(left, right, dest);
break;
case SIMDLane::i16x8:
m_assembler.vpaddw_rrr(left, right, dest);
break;
case SIMDLane::i32x4:
m_assembler.vpaddd_rrr(left, right, dest);
break;
case SIMDLane::i64x2:
m_assembler.vpaddq_rrr(left, right, dest);
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector add.");
}
}

void vectorSub(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(left);
else
UNUSED_PARAM(left);
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
RELEASE_ASSERT(supportsAVXForSIMD());

switch (simdInfo.lane) {
case SIMDLane::f32x4:
m_assembler.vsubps_rrr(left, right, dest);
break;
case SIMDLane::f64x2:
m_assembler.vsubpd_rrr(left, right, dest);
break;
case SIMDLane::i8x16:
m_assembler.vpsubb_rrr(left, right, dest);
break;
case SIMDLane::i16x8:
m_assembler.vpsubw_rrr(left, right, dest);
break;
case SIMDLane::i32x4:
m_assembler.vpsubd_rrr(left, right, dest);
break;
case SIMDLane::i64x2:
m_assembler.vpsubq_rrr(left, right, dest);
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector subtract.");
}
}

void vectorMul(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(left);
else
UNUSED_PARAM(left);
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
RELEASE_ASSERT(supportsAVXForSIMD());

switch (simdInfo.lane) {
case SIMDLane::f32x4:
m_assembler.vmulps_rrr(left, right, dest);
break;
case SIMDLane::f64x2:
m_assembler.vmulpd_rrr(left, right, dest);
break;
case SIMDLane::i16x8:
m_assembler.vpmullw_rrr(left, right, dest);
break;
case SIMDLane::i32x4:
m_assembler.vpmulld_rrr(left, right, dest);
break;
case SIMDLane::i64x2:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("i64x2 multiply is not supported on Intel without AVX-512. This instruction should have been lowered before reaching the assembler.");
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector multiply.");
}
}

void vectorDiv(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
switch (simdInfo.lane) {
case SIMDLane::f32x4:
m_assembler.vdivps_rrr(left, right, dest);
break;
case SIMDLane::f64x2:
m_assembler.vdivpd_rrr(left, right, dest);
break;
default:
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector divide.");
}
}

void vectorMax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
@@ -2576,37 +2640,42 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
}
}

void vectorNeg(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
if (scalarTypeIsFloatingPoint(simdInfo.lane))
UNUSED_PARAM(dest);
else
UNUSED_PARAM(dest);
UNUSED_PARAM(input);
}

void vectorPopcnt(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
ASSERT(simdInfo.lane == SIMDLane::i8x16);
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
}

using RoundingType = X86Assembler::RoundingType;

void vectorCeil(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vroundps_rr(input, dest, RoundingType::TowardInfiniti);
else
m_assembler.vroundpd_rr(input, dest, RoundingType::TowardInfiniti);
}

void vectorFloor(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vroundps_rr(input, dest, RoundingType::TowardNegativeInfiniti);
else
m_assembler.vroundpd_rr(input, dest, RoundingType::TowardNegativeInfiniti);
}

void vectorTrunc(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vroundps_rr(input, dest, RoundingType::TowardZero);
else
m_assembler.vroundpd_rr(input, dest, RoundingType::TowardZero);
}

void vectorTruncSat(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -2620,8 +2689,12 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorNearest(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
if (simdInfo.lane == SIMDLane::f32x4)
m_assembler.vroundps_rr(input, dest, RoundingType::ToNearestWithTiesToEven);
else
m_assembler.vroundpd_rr(input, dest, RoundingType::ToNearestWithTiesToEven);
}

void vectorSqrt(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -2881,11 +2954,10 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
}

}
void vectorDotProductInt32(FPRegisterID a, FPRegisterID b, FPRegisterID dest, FPRegisterID) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }

void vectorSwizzle(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
{
if (supportsAVX())
if (supportsAVXForSIMD())
m_assembler.vpshufb_rr(b, a, dest);
else {
if (a != dest)
@@ -2894,7 +2966,13 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
}
}

void vectorShuffle(TrustedImm64 immLow, TrustedImm64 immHigh, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(immLow); UNUSED_PARAM(immHigh); UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
void vectorDotProductInt32(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
{
RELEASE_ASSERT(supportsAVXForSIMD());
m_assembler.vpmaddwd_rrr(a, b, dest);
}

void vectorShuffle(TrustedImm64 immLow, TrustedImm64 immHigh, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { (void) immLow; (void) immHigh; (void) a; (void) b; (void) dest; }

// Misc helper functions.

0 comments on commit 155740a

Please sign in to comment.