Skip to content
Permalink
Browse files
[SIMD] Intel support for swizzle and shuffle and fix extract_lane and…
… replace_lane

https://bugs.webkit.org/show_bug.cgi?id=248728
rdar://102942396

Reviewed by Yusuke Suzuki.

This patch aims for three tasks:

    1. Add WASM SIMD operations `swizzle` and `shuffle`.
    https://github.com/WebAssembly/simd/blob/main/proposals/simd/SIMD.md#shuffling-using-immediate-indices

    2. Refactor MacroAssemblerX86_64.h and X86Assembler.h for `pinsr` and `pextr`,
    where X86Assembler should do simple instruction emission only. And MacroAssemblerX86
    should select the instruction.

    3. Fix WASM SIMD operation `replace_lane`. Previously, the operation is
    implemented with instruction `pinsr` in AVX format but only passing three parameters
    which is wrong.
        https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
        https://www.felixcloutier.com/x86/pinsrw
        https://www.officedaytime.com/simd512e/simdimg/si.php?f=pinsrb
        https://www.officedaytime.com/simd512e/simdimg/si.php?f=pinsrw
        https://www.officedaytime.com/simd512e/simdimg/si.php?f=pinsrd
        https://www.officedaytime.com/simd512e/simdimg/si.php?f=pinsrq

* Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::vectorReplaceLane):
(JSC::MacroAssemblerX86_64::vectorExtractLane):
(JSC::MacroAssemblerX86_64::vectorSwizzle):
* Source/JavaScriptCore/assembler/X86Assembler.h:
(JSC::X86Assembler::pinsrb):
(JSC::X86Assembler::pinsrw):
(JSC::X86Assembler::pinsrd):
(JSC::X86Assembler::pinsrq):
(JSC::X86Assembler::insertps):
(JSC::X86Assembler::unpcklpd):
(JSC::X86Assembler::vpextrb):
(JSC::X86Assembler::vpextrw):
(JSC::X86Assembler::vpextrd):
(JSC::X86Assembler::vpextrq):
(JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
(JSC::X86Assembler::pinsr): Deleted.
(JSC::X86Assembler::pextr): Deleted.
(JSC::X86Assembler::vextractps): Deleted.
* Source/JavaScriptCore/b3/air/AirOpcode.opcodes:
* Source/JavaScriptCore/wasm/WasmAirIRGenerator.cpp:
(JSC::Wasm::AirIRGenerator::addReplaceLane):

Canonical link: https://commits.webkit.org/257400@main
  • Loading branch information
hyjorc1 authored and Yijia Huang committed Dec 6, 2022
1 parent 919c3b8 commit cef394ff577b74bcaebdd6d3494aaef7bf78760b
Show file tree
Hide file tree
Showing 4 changed files with 291 additions and 121 deletions.
@@ -2123,6 +2123,7 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
}

// SIMD
// FIXME: We should use AVX or SSE only due to performance concerns.

void signExtendForSIMDLane(RegisterID reg, SIMDLane simdLane)
{
@@ -2137,21 +2138,75 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {

void vectorReplaceLane(SIMDLane simdLane, TrustedImm32 lane, RegisterID src, FPRegisterID dest)
{
m_assembler.pinsr(simdLane, lane.m_value, src, dest);
switch (simdLane) {
case SIMDLane::i8x16:
m_assembler.pinsrb_rr(lane.m_value, src, dest);
return;
case SIMDLane::i16x8:
m_assembler.pinsrw_rr(lane.m_value, src, dest);
return;
case SIMDLane::i32x4:
m_assembler.pinsrd_rr(lane.m_value, src, dest);
return;
case SIMDLane::i64x2:
m_assembler.pinsrq_rr(lane.m_value, src, dest);
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

void vectorReplaceLane(SIMDLane simdLane, TrustedImm32 lane, FPRegisterID src, FPRegisterID dest, RegisterID scratch)
void vectorReplaceLane(SIMDLane simdLane, TrustedImm32 lane, FPRegisterID src, FPRegisterID dest)
{
// FIXME: Maybe we can use INSERTPS instead to get rid of the scratch register.
moveDoubleTo64(src, scratch);
m_assembler.pinsr(simdLane, lane.m_value, scratch, dest);
switch (simdLane) {
case SIMDLane::f32x4:
m_assembler.insertps_rr(lane.m_value, src, dest);
return;
case SIMDLane::f64x2:
ASSERT(lane.m_value < 2);
if (lane.m_value)
m_assembler.unpcklpd_rr(src, dest);
else
m_assembler.movsd_rr(src, dest);
return;
default:
RELEASE_ASSERT_NOT_REACHED();
}
}

DEFINE_SIMD_FUNCS(vectorReplaceLane);

void vectorExtractLane(SIMDLane simdLane, SIMDSignMode signMode, TrustedImm32 lane, FPRegisterID src, RegisterID dest)
{
m_assembler.pextr(simdLane, lane.m_value, src, dest);
switch (simdLane) {
case SIMDLane::i8x16:
if (supportsAVX())
m_assembler.vpextrb_rr(lane.m_value, src, dest);
else
m_assembler.pextrb_rr(lane.m_value, src, dest);
break;
case SIMDLane::i16x8:
if (supportsAVX())
m_assembler.vpextrw_rr(lane.m_value, src, dest);
else
m_assembler.pextrw_rr(lane.m_value, src, dest);
break;
case SIMDLane::i32x4:
if (supportsAVX())
m_assembler.vpextrd_rr(lane.m_value, src, dest);
else
m_assembler.pextrd_rr(lane.m_value, src, dest);
break;
case SIMDLane::i64x2:
if (supportsAVX())
m_assembler.vpextrq_rr(lane.m_value, src, dest);
else
m_assembler.pextrq_rr(lane.m_value, src, dest);
break;
default:
RELEASE_ASSERT_NOT_REACHED();
}

if (signMode == SIMDSignMode::Signed)
signExtendForSIMDLane(dest, simdLane);
}
@@ -2585,7 +2640,18 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
void vectorAvgRound(SIMDInfo simdInfo, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(simdInfo); UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
void vectorMulSat(FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
void vectorDotProductInt32(FPRegisterID a, FPRegisterID b, FPRegisterID dest, FPRegisterID) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }
void vectorSwizzle(FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }

void vectorSwizzle(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
{
if (supportsAVX())
m_assembler.vpshufb_rr(b, a, dest);
else {
if (a != dest)
m_assembler.movapd_rr(a, dest);
m_assembler.pshufb_rr(b, dest);
}
}

void vectorShuffle(TrustedImm64 immLow, TrustedImm64 immHigh, FPRegisterID a, FPRegisterID b, FPRegisterID dest) { UNUSED_PARAM(immLow); UNUSED_PARAM(immHigh); UNUSED_PARAM(a); UNUSED_PARAM(b); UNUSED_PARAM(dest); }

// Misc helper functions.

0 comments on commit cef394f

Please sign in to comment.