Skip to content

Commit 155740a

Browse files
committed
[WebAssembly SIMD] Support integer and floating-point arithmetic on Intel
https://bugs.webkit.org/show_bug.cgi?id=248549 Reviewed by Justin Michaud. Adds support for basic SIMD integer arithmetic and floating-point arithmetic instructions on Intel. * Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h: (JSC::MacroAssemblerX86_64::vectorAdd): (JSC::MacroAssemblerX86_64::vectorSub): (JSC::MacroAssemblerX86_64::vectorMul): (JSC::MacroAssemblerX86_64::vectorDiv): (JSC::MacroAssemblerX86_64::vectorCeil): (JSC::MacroAssemblerX86_64::vectorFloor): (JSC::MacroAssemblerX86_64::vectorTrunc): (JSC::MacroAssemblerX86_64::vectorNearest): (JSC::MacroAssemblerX86_64::vectorDotProductInt32): (JSC::MacroAssemblerX86_64::vectorNeg): Deleted. * Source/JavaScriptCore/assembler/X86Assembler.h: (JSC::X86Assembler::vectorFadd): (JSC::X86Assembler::vectorAdd): (JSC::X86Assembler::vectorFsub): (JSC::X86Assembler::vectorSub): (JSC::X86Assembler::vectorFmul): (JSC::X86Assembler::vectorMul): (JSC::X86Assembler::vectorFdiv): (JSC::X86Assembler::vroundps_rr): (JSC::X86Assembler::vroundpd_rr): (JSC::X86Assembler::vpmaddwd_rrr): (JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM): * Source/JavaScriptCore/b3/air/AirOpcode.opcodes: * Source/JavaScriptCore/wasm/WasmAirIRGenerator.cpp: (JSC::Wasm::AirIRGenerator::addSIMDV_V): Canonical link: https://commits.webkit.org/257511@main
1 parent 4a8000a commit 155740a

File tree

7 files changed

+302
-48
lines changed

7 files changed

+302
-48
lines changed

Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,11 +817,15 @@ void MacroAssemblerX86Common::collectCPUFeatures()
817817
size_t valSize = sizeof(val);
818818
int rc = sysctlbyname("hw.optional.bmi1", &val, &valSize, nullptr, 0);
819819
s_bmi1CheckState = (rc >= 0 && val) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
820+
821+
rc = sysctlbyname("hw.optional.avx2_0", &val, &valSize, nullptr, 0);
822+
s_avx2CheckState = (rc >= 0 && val) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
820823
}
821824
#else
822825
{
823826
CPUID cpuid = getCPUID(0x7);
824827
s_bmi1CheckState = (cpuid[2] & (1 << 3)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
828+
s_avx2CheckState = (cpuid[2] & (1 << 5)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
825829
}
826830
#endif
827831
{
@@ -836,6 +840,7 @@ MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_supplemental
836840
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked;
837841
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_2CheckState = CPUIDCheckState::NotChecked;
838842
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked;
843+
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avx2CheckState = CPUIDCheckState::NotChecked;
839844
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked;
840845
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_bmi1CheckState = CPUIDCheckState::NotChecked;
841846
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_popcntCheckState = CPUIDCheckState::NotChecked;

Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4013,6 +4013,20 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<Assembler> {
40134013
return false;
40144014
}
40154015

4016+
static bool supportsAVXForSIMD()
4017+
{
4018+
if (s_avxCheckState == CPUIDCheckState::NotChecked)
4019+
collectCPUFeatures();
4020+
return s_avxCheckState == CPUIDCheckState::Set;
4021+
}
4022+
4023+
static bool supportsAVX2()
4024+
{
4025+
if (s_avx2CheckState == CPUIDCheckState::NotChecked)
4026+
collectCPUFeatures();
4027+
return s_avx2CheckState == CPUIDCheckState::Set;
4028+
}
4029+
40164030
void lfence()
40174031
{
40184032
m_assembler.lfence();
@@ -4300,6 +4314,7 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<Assembler> {
43004314
JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_1CheckState;
43014315
JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_2CheckState;
43024316
JS_EXPORT_PRIVATE static CPUIDCheckState s_avxCheckState;
4317+
JS_EXPORT_PRIVATE static CPUIDCheckState s_avx2CheckState;
43034318
JS_EXPORT_PRIVATE static CPUIDCheckState s_lzcntCheckState;
43044319
JS_EXPORT_PRIVATE static CPUIDCheckState s_bmi1CheckState;
43054320
JS_EXPORT_PRIVATE static CPUIDCheckState s_popcntCheckState;

Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h

Lines changed: 114 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,25 +2180,25 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
21802180
{
21812181
switch (simdLane) {
21822182
case SIMDLane::i8x16:
2183-
if (supportsAVX())
2183+
if (supportsAVXForSIMD())
21842184
m_assembler.vpextrb_rr(lane.m_value, src, dest);
21852185
else
21862186
m_assembler.pextrb_rr(lane.m_value, src, dest);
21872187
break;
21882188
case SIMDLane::i16x8:
2189-
if (supportsAVX())
2189+
if (supportsAVXForSIMD())
21902190
m_assembler.vpextrw_rr(lane.m_value, src, dest);
21912191
else
21922192
m_assembler.pextrw_rr(lane.m_value, src, dest);
21932193
break;
21942194
case SIMDLane::i32x4:
2195-
if (supportsAVX())
2195+
if (supportsAVXForSIMD())
21962196
m_assembler.vpextrd_rr(lane.m_value, src, dest);
21972197
else
21982198
m_assembler.pextrd_rr(lane.m_value, src, dest);
21992199
break;
22002200
case SIMDLane::i64x2:
2201-
if (supportsAVX())
2201+
if (supportsAVXForSIMD())
22022202
m_assembler.vpextrq_rr(lane.m_value, src, dest);
22032203
else
22042204
m_assembler.pextrq_rr(lane.m_value, src, dest);
@@ -2320,35 +2320,99 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
23202320

23212321
void vectorAdd(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
23222322
{
2323-
if (scalarTypeIsFloatingPoint(simdInfo.lane))
2324-
UNUSED_PARAM(left);
2325-
else
2326-
UNUSED_PARAM(left);
2327-
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
2323+
RELEASE_ASSERT(supportsAVXForSIMD());
2324+
2325+
switch (simdInfo.lane) {
2326+
case SIMDLane::f32x4:
2327+
m_assembler.vaddps_rrr(left, right, dest);
2328+
break;
2329+
case SIMDLane::f64x2:
2330+
m_assembler.vaddpd_rrr(left, right, dest);
2331+
break;
2332+
case SIMDLane::i8x16:
2333+
m_assembler.vpaddb_rrr(left, right, dest);
2334+
break;
2335+
case SIMDLane::i16x8:
2336+
m_assembler.vpaddw_rrr(left, right, dest);
2337+
break;
2338+
case SIMDLane::i32x4:
2339+
m_assembler.vpaddd_rrr(left, right, dest);
2340+
break;
2341+
case SIMDLane::i64x2:
2342+
m_assembler.vpaddq_rrr(left, right, dest);
2343+
break;
2344+
default:
2345+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector add.");
2346+
}
23282347
}
23292348

23302349
void vectorSub(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
23312350
{
2332-
if (scalarTypeIsFloatingPoint(simdInfo.lane))
2333-
UNUSED_PARAM(left);
2334-
else
2335-
UNUSED_PARAM(left);
2336-
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
2351+
RELEASE_ASSERT(supportsAVXForSIMD());
2352+
2353+
switch (simdInfo.lane) {
2354+
case SIMDLane::f32x4:
2355+
m_assembler.vsubps_rrr(left, right, dest);
2356+
break;
2357+
case SIMDLane::f64x2:
2358+
m_assembler.vsubpd_rrr(left, right, dest);
2359+
break;
2360+
case SIMDLane::i8x16:
2361+
m_assembler.vpsubb_rrr(left, right, dest);
2362+
break;
2363+
case SIMDLane::i16x8:
2364+
m_assembler.vpsubw_rrr(left, right, dest);
2365+
break;
2366+
case SIMDLane::i32x4:
2367+
m_assembler.vpsubd_rrr(left, right, dest);
2368+
break;
2369+
case SIMDLane::i64x2:
2370+
m_assembler.vpsubq_rrr(left, right, dest);
2371+
break;
2372+
default:
2373+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector subtract.");
2374+
}
23372375
}
23382376

23392377
void vectorMul(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
23402378
{
2341-
if (scalarTypeIsFloatingPoint(simdInfo.lane))
2342-
UNUSED_PARAM(left);
2343-
else
2344-
UNUSED_PARAM(left);
2345-
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
2379+
RELEASE_ASSERT(supportsAVXForSIMD());
2380+
2381+
switch (simdInfo.lane) {
2382+
case SIMDLane::f32x4:
2383+
m_assembler.vmulps_rrr(left, right, dest);
2384+
break;
2385+
case SIMDLane::f64x2:
2386+
m_assembler.vmulpd_rrr(left, right, dest);
2387+
break;
2388+
case SIMDLane::i16x8:
2389+
m_assembler.vpmullw_rrr(left, right, dest);
2390+
break;
2391+
case SIMDLane::i32x4:
2392+
m_assembler.vpmulld_rrr(left, right, dest);
2393+
break;
2394+
case SIMDLane::i64x2:
2395+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("i64x2 multiply is not supported on Intel without AVX-512. This instruction should have been lowered before reaching the assembler.");
2396+
break;
2397+
default:
2398+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector multiply.");
2399+
}
23462400
}
23472401

23482402
void vectorDiv(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
23492403
{
2404+
RELEASE_ASSERT(supportsAVXForSIMD());
23502405
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
2351-
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
2406+
switch (simdInfo.lane) {
2407+
case SIMDLane::f32x4:
2408+
m_assembler.vdivps_rrr(left, right, dest);
2409+
break;
2410+
case SIMDLane::f64x2:
2411+
m_assembler.vdivpd_rrr(left, right, dest);
2412+
break;
2413+
default:
2414+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Invalid SIMD lane for vector divide.");
2415+
}
23522416
}
23532417

23542418
void vectorMax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
@@ -2576,37 +2640,42 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
25762640
}
25772641
}
25782642

2579-
void vectorNeg(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
2580-
{
2581-
if (scalarTypeIsFloatingPoint(simdInfo.lane))
2582-
UNUSED_PARAM(dest);
2583-
else
2584-
UNUSED_PARAM(dest);
2585-
UNUSED_PARAM(input);
2586-
}
2587-
25882643
void vectorPopcnt(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
25892644
{
25902645
ASSERT(simdInfo.lane == SIMDLane::i8x16);
25912646
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
25922647
}
25932648

2649+
using RoundingType = X86Assembler::RoundingType;
2650+
25942651
void vectorCeil(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
25952652
{
2653+
RELEASE_ASSERT(supportsAVXForSIMD());
25962654
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
2597-
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
2655+
if (simdInfo.lane == SIMDLane::f32x4)
2656+
m_assembler.vroundps_rr(input, dest, RoundingType::TowardInfiniti);
2657+
else
2658+
m_assembler.vroundpd_rr(input, dest, RoundingType::TowardInfiniti);
25982659
}
25992660

26002661
void vectorFloor(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
26012662
{
2663+
RELEASE_ASSERT(supportsAVXForSIMD());
26022664
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
2603-
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
2665+
if (simdInfo.lane == SIMDLane::f32x4)
2666+
m_assembler.vroundps_rr(input, dest, RoundingType::TowardNegativeInfiniti);
2667+
else
2668+
m_assembler.vroundpd_rr(input, dest, RoundingType::TowardNegativeInfiniti);
26042669
}
26052670

26062671
void vectorTrunc(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
26072672
{
2673+
RELEASE_ASSERT(supportsAVXForSIMD());
26082674
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
2609-
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
2675+
if (simdInfo.lane == SIMDLane::f32x4)
2676+
m_assembler.vroundps_rr(input, dest, RoundingType::TowardZero);
2677+
else
2678+
m_assembler.vroundpd_rr(input, dest, RoundingType::TowardZero);
26102679
}
26112680

26122681
void vectorTruncSat(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -2620,8 +2689,12 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
26202689

26212690
void vectorNearest(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
26222691
{
2692+
RELEASE_ASSERT(supportsAVXForSIMD());
26232693
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
2624-
UNUSED_PARAM(input); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
2694+
if (simdInfo.lane == SIMDLane::f32x4)
2695+
m_assembler.vroundps_rr(input, dest, RoundingType::ToNearestWithTiesToEven);
2696+
else
2697+
m_assembler.vroundpd_rr(input, dest, RoundingType::ToNearestWithTiesToEven);
26252698
}
26262699

26272700
void vectorSqrt(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -2881,11 +2954,10 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
28812954
}
28822955

28832956
}
2884-
void vectorDotProductInt32(FPRegisterID a, FPRegisterID b, FPRegisterID dest, FPRegisterID) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
28852957

28862958
void vectorSwizzle(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
28872959
{
2888-
if (supportsAVX())
2960+
if (supportsAVXForSIMD())
28892961
m_assembler.vpshufb_rr(b, a, dest);
28902962
else {
28912963
if (a != dest)
@@ -2894,7 +2966,13 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
28942966
}
28952967
}
28962968

2897-
void vectorShuffle(TrustedImm64 immLow, TrustedImm64 immHigh, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(immLow); UNUSED_PARAM(immHigh); UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
2969+
void vectorDotProductInt32(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
2970+
{
2971+
RELEASE_ASSERT(supportsAVXForSIMD());
2972+
m_assembler.vpmaddwd_rrr(a, b, dest);
2973+
}
2974+
2975+
void vectorShuffle(TrustedImm64 immLow, TrustedImm64 immHigh, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { (void) immLow; (void) immHigh; (void) a; (void) b; (void) dest; }
28982976

28992977
// Misc helper functions.
29002978

0 commit comments

Comments
 (0)