diff --git a/lib/Backend/IRBuilderAsmJs.cpp b/lib/Backend/IRBuilderAsmJs.cpp index dfc13b921b1..61b59dd4935 100644 --- a/lib/Backend/IRBuilderAsmJs.cpp +++ b/lib/Backend/IRBuilderAsmJs.cpp @@ -6,6 +6,7 @@ #include "Backend.h" #ifdef ASMJS_PLAT #include "ByteCode/OpCodeUtilAsmJs.h" +#include "../../WasmReader/WasmParseTree.h" void IRBuilderAsmJs::Build() @@ -1954,6 +1955,7 @@ IRBuilderAsmJs::BuildAsmReg1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSl #define Bool32x4Proc(v) GetRegSlotFromSimd128Reg(v) #define Int32x4Proc(v) GetRegSlotFromSimd128Reg(v) #define Float64x2Proc(v) GetRegSlotFromSimd128Reg(v) +#define Int64x2Proc(v) GetRegSlotFromSimd128Reg(v) #define Int16x8Proc(v) GetRegSlotFromSimd128Reg(v) #define Bool16x8Proc(v) GetRegSlotFromSimd128Reg(v) #define Int8x16Proc(v) GetRegSlotFromSimd128Reg(v) @@ -2250,6 +2252,17 @@ IRBuilderAsmJs::BuildReg1Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegS } } +void +IRBuilderAsmJs::BuildFloat32x4_IntConst4(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, int C1, int C2, int C3, int C4) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_LdC); + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128F4); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); + SIMDValue simdConst{ C1, C2, C3, C4 }; + IR::Instr * instr = IR::Instr::New(Js::OpCode::Simd128_LdC, dstOpnd, IR::Simd128ConstOpnd::New(simdConst, TySimd128F4, m_func), m_func); + AddInstr(instr, offset); +} + void IRBuilderAsmJs::BuildInt1Const1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, int constInt) { @@ -4007,6 +4020,37 @@ IRBuilderAsmJs::BuildInt32x4_3(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_S BuildSimd_3(newOpcode, offset, dstRegSlot, src1RegSlot, src2RegSlot, TySimd128I4); } +void +IRBuilderAsmJs::BuildInt32x4_4(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG4) +{ + ValueType valueType = GetSimdValueTypeFromIRType(TySimd128I4); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128I4); + src1Opnd->SetValueType(valueType); + + IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TySimd128I4); + src2Opnd->SetValueType(valueType); + + IR::RegOpnd * mask = BuildSrcOpnd(src3RegSlot, TySimd128I4); + src2Opnd->SetValueType(valueType); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128I4); + dstOpnd->SetValueType(GetSimdValueTypeFromIRType(TySimd128I4)); + + Js::OpCode opcode; + + opcode = GetSimdOpcode(newOpcode); + + AssertMsg((uint32)opcode, "Invalid backend SIMD opcode"); + + IR::Instr* instr = nullptr; + + instr = AddExtendedArg(src1Opnd, nullptr, offset); + instr = AddExtendedArg(src2Opnd, instr->GetDst()->AsRegOpnd(), offset); + instr = AddExtendedArg(mask, instr->GetDst()->AsRegOpnd(), offset); + + AddInstr(IR::Instr::New(opcode, dstOpnd, instr->GetDst(), m_func), offset); +} + void IRBuilderAsmJs::BuildBool32x4_1Int32x4_2(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) { @@ -4565,32 +4609,144 @@ void IRBuilderAsmJs::BuildReg1Int8x16_1(Js::OpCodeAsmJs newOpcode, uint32 offset } } +/* Int64x2 */ +void +IRBuilderAsmJs::BuildInt64x2_1Long1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG2) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_Splat_I2); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TyInt64); + src1Opnd->SetValueType(ValueType::GetInt(false)); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128I2); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Int64x2)); + + Js::OpCode opcode = GetSimdOpcode(newOpcode); + AssertMsg((uint32)opcode, "Invalid backend SIMD opcode"); + + IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, m_func); + AddInstr(instr, offset); +} + +void +IRBuilderAsmJs::BuildInt1Bool64x2_1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG2) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_AnyTrue_B2 || newOpcode == Js::OpCodeAsmJs::Simd128_AllTrue_B2); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128I2); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Int64x2)); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TyInt32); + dstOpnd->SetValueType(ValueType::GetInt(false)); + + Js::OpCode opcode = GetSimdOpcode(newOpcode); + AssertMsg((uint32)opcode, "Invalid backend SIMD opcode"); + + IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, m_func); + AddInstr(instr, offset); +} + +void +IRBuilderAsmJs::BuildLong1Int64x2_1Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_ExtractLane_I2); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128I2); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Int64x2)); + + IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TyInt32); + src2Opnd->SetValueType(ValueType::GetInt(false)); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TyInt64); + dstOpnd->SetValueType(ValueType::GetInt(false)); + + Js::OpCode opcode = GetSimdOpcode(newOpcode); + AssertMsg((uint32)opcode, "Invalid backend SIMD opcode"); + + IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, src2Opnd, m_func); + AddInstr(instr, offset); +} + +void +IRBuilderAsmJs::BuildInt64x2_2_Int1_Long1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG4) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_ReplaceLane_I2); + BuildSimd_2Int2(newOpcode, offset, dstRegSlot, src1RegSlot, src2RegSlot, src3RegSlot, TySimd128I2, TyInt64); +} + +void +IRBuilderAsmJs::BuildInt64x2_2Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_ShLtByScalar_I2 || + newOpcode == Js::OpCodeAsmJs::Simd128_ShRtByScalar_I2 || + newOpcode == Js::OpCodeAsmJs::Simd128_ShRtByScalar_U2 + ); + BuildSimd_2Int1(newOpcode, offset, dstRegSlot, src1RegSlot, src2RegSlot, TySimd128I2); +} + +void +IRBuilderAsmJs::BuildInt64x2_3(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_Add_I2 || newOpcode == Js::OpCodeAsmJs::Simd128_Sub_I2); + BuildSimd_3(newOpcode, offset, dstRegSlot, src1RegSlot, src2RegSlot, TySimd128F4); +} + +void +IRBuilderAsmJs::BuildInt64x2_2(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG2) +{ + Assert(newOpcode == Js::OpCodeAsmJs::Simd128_Neg_I2 || + newOpcode == Js::OpCodeAsmJs::Simd128_FromUint64x2_D2 || + newOpcode == Js::OpCodeAsmJs::Simd128_FromInt64x2_D2 || + newOpcode == Js::OpCodeAsmJs::Simd128_FromFloat64x2_I2 || + newOpcode == Js::OpCodeAsmJs::Simd128_FromFloat64x2_U2); + BuildSimd_2(newOpcode, offset, dstRegSlot, src1RegSlot, TySimd128I2); +} + /* Float64x2 */ -// Disabled for now -#if 0 +void +IRBuilderAsmJs::BuildDouble1Float64x2_1Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) +{ + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128F4); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); + + IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TyInt32); + src2Opnd->SetValueType(ValueType::GetInt(false)); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TyFloat64); + dstOpnd->SetValueType(ValueType::Float); + + Js::OpCode opcode = GetSimdOpcode(newOpcode); + + AssertMsg((opcode == Js::OpCode::Simd128_ExtractLane_D2), "Unexpected opcode for this format."); + + IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, src2Opnd, m_func); + AddInstr(instr, offset); +} + +void IRBuilderAsmJs::BuildFloat64x2_1Double1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG2) +{ + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TyFloat64); + src1Opnd->SetValueType(ValueType::Float); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128F4); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); + + Js::OpCode opcode = GetSimdOpcode(newOpcode); + + AssertMsg(opcode == Js::OpCode::Simd128_Splat_D2, "Invalid backend SIMD opcode"); + IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, m_func); + AddInstr(instr, offset); +} + void IRBuilderAsmJs::BuildFloat64x2_2(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG2) { - IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128D2); - src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float64x2)); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128F4); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); - IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128D2); - dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float64x2)); + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128F4); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); Js::OpCode opcode; - switch (newOpcode) - { - case Js::OpCodeAsmJs::Simd128_Return_D2: - CheckJitLoopReturn(dstRegSlot, TySimd128D2); - opcode = Js::OpCode::Ld_A; - break; - case Js::OpCodeAsmJs::Simd128_Ld_D2: - opcode = Js::OpCode::Ld_A; - break; - default: - opcode = GetSimdOpcode(newOpcode); - } + opcode = GetSimdOpcode(newOpcode); AssertMsg((uint32)opcode, "Invalid backend SIMD opcode"); @@ -4601,14 +4757,14 @@ IRBuilderAsmJs::BuildFloat64x2_2(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD void IRBuilderAsmJs::BuildFloat64x2_3(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) { - IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128D2); - src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float64x2)); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128F4); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); - IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TySimd128D2); - src2Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float64x2)); + IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TySimd128F4); + src2Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); - IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128D2); - dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float64x2)); + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128F4); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); Js::OpCode opcode; @@ -4620,6 +4776,40 @@ IRBuilderAsmJs::BuildFloat64x2_3(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD AddInstr(instr, offset); } +void +IRBuilderAsmJs::BuildFloat64x2_2Int1Double1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG4) +{ + IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TySimd128F4); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); + + IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TyInt32); + src2Opnd->SetValueType(ValueType::GetInt(false)); + + IR::RegOpnd * src3Opnd = BuildSrcOpnd(src3RegSlot, TyFloat64); + src3Opnd->SetValueType(ValueType::Float); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128F4); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); + + // Given bytecode: dst = op s1, s2, s3 + // Generate: + // t1 = ExtendedArg_A s1 + // t2 = ExtendedArg_A s2, t1 + // t3 = ExtendedArg_A s3, t2 + // dst = op t3 + + IR::Instr* instr = nullptr; + + instr = AddExtendedArg(src1Opnd, nullptr, offset); + instr = AddExtendedArg(src2Opnd, instr->GetDst()->AsRegOpnd(), offset); + instr = AddExtendedArg(src3Opnd, instr->GetDst()->AsRegOpnd(), offset); + + Js::OpCode opcode = GetSimdOpcode(newOpcode); + AssertMsg((opcode == Js::OpCode::Simd128_ReplaceLane_D2), "Unexpected opcode for this format."); + AddInstr(IR::Instr::New(opcode, dstOpnd, instr->GetDst(), m_func), offset); +} +// Disabled for now +#if 0 void IRBuilderAsmJs::BuildFloat64x2_4(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG4) { @@ -4670,20 +4860,7 @@ void IRBuilderAsmJs::BuildFloat64x2_1Double2(Js::OpCodeAsmJs newOpcode, uint32 o AddInstr(instr, offset); } -void IRBuilderAsmJs::BuildFloat64x2_1Double1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG2) -{ - IR::RegOpnd * src1Opnd = BuildSrcOpnd(src1RegSlot, TyFloat64); - src1Opnd->SetValueType(ValueType::Float); - - IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128D2); - dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float64x2)); - Js::OpCode opcode = GetSimdOpcode(newOpcode); - - AssertMsg(opcode == Js::OpCode::Simd128_Splat_D2, "Invalid backend SIMD opcode"); - IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, m_func); - AddInstr(instr, offset); -} void IRBuilderAsmJs::BuildFloat64x2_2Double1(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG3) @@ -5730,6 +5907,33 @@ void IRBuilderAsmJs::BuildUint8x16_2Int16(Js::OpCodeAsmJs newOpcode, uint32 offs AddInstr(IR::Instr::New(opcode, dstOpnd, instr->GetDst(), m_func), offset); } + +template +void +IRBuilderAsmJs::BuildAsmShuffle(Js::OpCodeAsmJs newOpcode, uint32 offset) +{ + Assert(OpCodeAttrAsmJs::HasMultiSizeLayout(newOpcode) && newOpcode == Js::OpCodeAsmJs::Simd128_Shuffle_V8X16); + auto layout = m_jnReader.GetLayout>(); + + IR::RegOpnd * dstOpnd = BuildDstOpnd(GetRegSlotFromSimd128Reg(layout->R0), TySimd128U16); + IR::RegOpnd * src1Opnd = BuildSrcOpnd(GetRegSlotFromSimd128Reg(layout->R1), TySimd128U16); + IR::RegOpnd * src2Opnd = BuildSrcOpnd(GetRegSlotFromSimd128Reg(layout->R2), TySimd128U16); + dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Uint8x16)); + src1Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Uint8x16)); + src2Opnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Uint8x16)); + + IR::Instr * instr = nullptr; + instr = AddExtendedArg(src1Opnd, nullptr, offset); + instr = AddExtendedArg(src2Opnd, instr->GetDst()->AsRegOpnd(), offset); + + for (uint i = 0; i < Wasm::Simd::MAX_LANES; i++) + { + IR::RegOpnd* shuffleOpnd = (IR::RegOpnd*)IR::IntConstOpnd::New(layout->INDICES[i], TyInt32, this->m_func); + instr = AddExtendedArg(shuffleOpnd, instr->GetDst()->AsRegOpnd(), offset); + } + AddInstr(IR::Instr::New(Js::OpCode::Simd128_Shuffle_U16, dstOpnd, instr->GetDst(), m_func), offset); +} + void IRBuilderAsmJs::BuildUint8x16_3Int16(Js::OpCodeAsmJs newOpcode, uint32 offset, BUILD_SIMD_ARGS_REG19) { IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128U16); @@ -6238,7 +6442,7 @@ void IRBuilderAsmJs::BuildSimd_1Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, J AddInstr(instr, offset); } -void IRBuilderAsmJs::BuildSimd_2Int2(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, Js::RegSlot src3RegSlot, IRType simdType) +void IRBuilderAsmJs::BuildSimd_2Int2(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, Js::RegSlot src3RegSlot, IRType simdType, IRType valType) { ValueType valueType = GetSimdValueTypeFromIRType(simdType); @@ -6248,7 +6452,7 @@ void IRBuilderAsmJs::BuildSimd_2Int2(Js::OpCodeAsmJs newOpcode, uint32 offset, J IR::RegOpnd * src2Opnd = BuildSrcOpnd(src2RegSlot, TyInt32); src2Opnd->SetValueType(ValueType::GetInt(false)); - IR::RegOpnd * src3Opnd = BuildSrcOpnd(src3RegSlot, TyInt32); + IR::RegOpnd * src3Opnd = BuildSrcOpnd(src3RegSlot, valType); src3Opnd->SetValueType(ValueType::GetInt(false)); IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, simdType); @@ -6313,8 +6517,24 @@ void IRBuilderAsmJs::BuildSimd_2(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::R AssertMsg((uint32)opcode, "Invalid backend SIMD opcode"); - IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, m_func); - AddInstr(instr, offset); + if (newOpcode == Js::OpCodeAsmJs::Simd128_Neg_I2) + { + SIMDValue zeroVec{ 0 }; + IR::Opnd* zeroConst = IR::Simd128ConstOpnd::New(zeroVec, TySimd128F4, m_func); + IR::RegOpnd* tmpReg = IR::RegOpnd::New(TyMachSimd128F4, m_func); + tmpReg->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4)); + IR::Instr * instr = IR::Instr::New(Js::OpCode::Simd128_LdC, tmpReg, zeroConst, m_func); + AddInstr(instr, offset); + instr = IR::Instr::New(Js::OpCode::Simd128_Sub_I2, dstOpnd, tmpReg, src1Opnd, m_func); + AddInstr(instr, offset); + + } + else + { + IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, m_func); + AddInstr(instr, offset); + } + } void IRBuilderAsmJs::BuildSimd_2Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, IRType simdType) @@ -6410,6 +6630,8 @@ ValueType IRBuilderAsmJs::GetSimdValueTypeFromIRType(IRType type) return ValueType::GetSimd128(ObjectType::Simd128Float32x4); case TySimd128D2: return ValueType::GetSimd128(ObjectType::Simd128Float64x2); + case TySimd128I2: + return ValueType::GetSimd128(ObjectType::Simd128Int64x2); case TySimd128I4: return ValueType::GetSimd128(ObjectType::Simd128Int32x4); case TySimd128I8: @@ -6469,11 +6691,11 @@ void IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offs { Assert(OpCodeAttrAsmJs::HasMultiSizeLayout(newOpcode)); auto layout = m_jnReader.GetLayout>(); - BuildAsmSimdTypedArr(newOpcode, offset, layout->SlotIndex, layout->Value, layout->ViewType, layout->DataWidth); + BuildAsmSimdTypedArr(newOpcode, offset, layout->SlotIndex, layout->Value, layout->ViewType, layout->DataWidth, layout->Offset); } void -IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth) +IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, uint32 simdOffset) { IRType type = TySimd128F4; Js::RegSlot valueRegSlot = GetRegSlotFromSimd128Reg(value); @@ -6801,6 +7023,7 @@ IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, u // REVIEW: Store dataWidth in the instruction itself instead of an argument to avoid using ExtendedArgs or excessive opcodes. Assert(dataWidth >= 4 && dataWidth <= 16); instr->dataWidth = dataWidth; + indirOpnd->SetOffset(simdOffset); if (maskInstr) { AddInstr(maskInstr, offset); diff --git a/lib/Backend/IRBuilderAsmJs.h b/lib/Backend/IRBuilderAsmJs.h index 8dcd5f6da1d..f73725e68c6 100644 --- a/lib/Backend/IRBuilderAsmJs.h +++ b/lib/Backend/IRBuilderAsmJs.h @@ -180,7 +180,7 @@ class IRBuilderAsmJs #include "ByteCode/LayoutTypesAsmJs.h" void BuildSimd_1Ints(Js::OpCodeAsmJs newOpcode, uint32 offset, IRType dstSimdType, Js::RegSlot* srcRegSlots, Js::RegSlot dstRegSlot, uint LANES); void BuildSimd_1Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, IRType simdType); - void BuildSimd_2Int2(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, Js::RegSlot src3RegSlot, IRType simdType); + void BuildSimd_2Int2(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, Js::RegSlot src3RegSlot, IRType simdType, IRType valType = TyInt32); void BuildSimd_2(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, IRType simdType); void BuildSimd_2Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, IRType simdType); void BuildSimd_3(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, Js::RegSlot src1RegSlot, Js::RegSlot src2RegSlot, IRType simdType); @@ -193,7 +193,7 @@ class IRBuilderAsmJs void BuildWasmLoopStart(Js::OpCodeAsmJs newOpcode, uint offset); void BuildWasmMemAccess(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, uint32 constOffset, Js::ArrayBufferView::ViewType viewType); void BuildAsmTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType); - void BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 DataWidth); + void BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, Js::ArrayBufferView::ViewType viewType, uint8 DataWidth, uint32 simdOffset); void BuildAsmCall(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::ArgSlot argCount, Js::RegSlot ret, Js::RegSlot function, int8 returnType, Js::ProfileId profileId); void BuildAsmReg1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstReg); void BuildBrInt1(Js::OpCodeAsmJs newOpcode, uint32 offset, int32 relativeOffset, Js::RegSlot src); @@ -266,6 +266,7 @@ class IRBuilderAsmJs #define Bool32x4_Type Js::RegSlot #define Int32x4_Type Js::RegSlot #define Float64x2_Type Js::RegSlot +#define Int64x2_Type Js::RegSlot #define Int16x8_Type Js::RegSlot #define Bool16x8_Type Js::RegSlot #define Int8x16_Type Js::RegSlot diff --git a/lib/Backend/IRTypeList.h b/lib/Backend/IRTypeList.h index 7d0bf1d9aee..1627ff7ec40 100644 --- a/lib/Backend/IRTypeList.h +++ b/lib/Backend/IRTypeList.h @@ -32,6 +32,7 @@ IRTYPE(Simd128U16, Simd, 16, b(16), 1, simd128) IRTYPE(Simd128B4, Simd, 16, b(16), 1, simd128) IRTYPE(Simd128B8, Simd, 16, b(16), 1, simd128) IRTYPE(Simd128B16, Simd, 16, b(16), 1, simd128) +IRTYPE(Simd128I2, Simd, 16, b(16), 1, simd128) IRTYPE(Simd128D2, Simd, 16, b(16), 1, simd128) // SIMD end diff --git a/lib/Backend/JnHelperMethodList.h b/lib/Backend/JnHelperMethodList.h index 7ffcacdd452..af7aa740b69 100644 --- a/lib/Backend/JnHelperMethodList.h +++ b/lib/Backend/JnHelperMethodList.h @@ -347,6 +347,18 @@ HELPERCALL(AllocUninitializedNumber, Js::JavascriptOperators::AllocUninitialized // SIMD_JS HELPERCALL(AllocUninitializedSimdF4, Js::JavascriptSIMDFloat32x4::AllocUninitialized, 0) HELPERCALL(AllocUninitializedSimdI4, Js::JavascriptSIMDInt32x4::AllocUninitialized, 0) + +#endif + +#ifdef ENABLE_WASM_SIMD +HELPERCALL(Simd128ShRtByScalarU2, Js::SIMDInt64x2Operation::OpShiftRightByScalarU, 0) +HELPERCALL(Simd128ShRtByScalarI2, Js::SIMDInt64x2Operation::OpShiftRightByScalar, 0) +HELPERCALL(Simd128ShLtByScalarI2, Js::SIMDInt64x2Operation::OpShiftLeftByScalar, 0) +HELPERCALL(Simd128ReplaceLaneI2, Js::SIMDInt64x2Operation::OpReplaceLane, 0) +HELPERCALL(Simd128TruncateI2, (void(*)(SIMDValue*, SIMDValue*))&Js::SIMDInt64x2Operation::OpTrunc, AttrCanThrow) +HELPERCALL(Simd128TruncateU2, (void(*)(SIMDValue*, SIMDValue*))&Js::SIMDInt64x2Operation::OpTrunc, AttrCanThrow) +HELPERCALL(Simd128ConvertSD2, (void(*)(SIMDValue*, SIMDValue*))&Js::SIMDFloat64x2Operation::OpConv, 0) +HELPERCALL(Simd128ConvertUD2, (void(*)(SIMDValue*, SIMDValue*))&Js::SIMDFloat64x2Operation::OpConv, 0) #endif HELPERCALL(Op_TryCatch, nullptr, 0) diff --git a/lib/Backend/Lifetime.h b/lib/Backend/Lifetime.h index 65da230114b..b45ebcff113 100644 --- a/lib/Backend/Lifetime.h +++ b/lib/Backend/Lifetime.h @@ -14,6 +14,7 @@ class Lifetime useList(alloc), lastUseLabel(NULL), region(NULL), isSpilled(false), useCount(0), useCountAdjust(0), allDefsCost(0), isLiveAcrossCalls(false), isLiveAcrossUserCalls(false), isDeadStore(true), isOpHelperSpilled(false), cantOpHelperSpill(false), isOpHelperSpillAsArg(false), isFloat(0), cantSpill(false), dontAllocate(false), isSecondChanceAllocated(false), isCheapSpill(false), spillStackSlot(NULL), + totalOpHelperLengthByEnd(0), needsStoreCompensation(false), alloc(alloc), regionUseCount(NULL), regionUseCountAdjust(NULL), cantStackPack(false) { diff --git a/lib/Backend/Lower.cpp b/lib/Backend/Lower.cpp index 8cfd488823a..5cc0980a30b 100644 --- a/lib/Backend/Lower.cpp +++ b/lib/Backend/Lower.cpp @@ -3019,7 +3019,7 @@ Lowerer::LowerRange(IR::Instr *instrStart, IR::Instr *instrEnd, bool defaultDoFa #endif //ENABLE_WASM default: -#ifdef ENABLE_SIMDJS +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) #if defined(_M_IX86) || defined(_M_X64) if (IsSimd128Opcode(instr->m_opcode)) { diff --git a/lib/Backend/LowerMDShared.cpp b/lib/Backend/LowerMDShared.cpp index 9201f4dce4e..bffd10f42ca 100644 --- a/lib/Backend/LowerMDShared.cpp +++ b/lib/Backend/LowerMDShared.cpp @@ -504,7 +504,7 @@ LowererMD::Init(Lowerer *lowerer) { m_lowerer = lowerer; this->lowererMDArch.Init(this); -#ifdef ENABLE_SIMDJS +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) Simd128InitOpcodeMap(); #endif } @@ -858,6 +858,9 @@ LowererMD::LowerRet(IR::Instr * retInstr) case Js::AsmJsRetType::Float64x2: regType = TySimd128D2; break; + case Js::AsmJsRetType::Int64x2: + regType = TySimd128I2; + break; case Js::AsmJsRetType::Int16x8: regType = TySimd128I8; break; @@ -1721,6 +1724,7 @@ LowererMD::Legalize(IR::Instr *const instr, bool fPostRegAlloc) case Js::OpCode::PADDB: case Js::OpCode::PADDSB: case Js::OpCode::PADDD: + case Js::OpCode::PADDQ: case Js::OpCode::PADDW: case Js::OpCode::PADDSW: case Js::OpCode::PADDUSB: @@ -1743,6 +1747,7 @@ LowererMD::Legalize(IR::Instr *const instr, bool fPostRegAlloc) case Js::OpCode::PSUBB: case Js::OpCode::PSUBSB: case Js::OpCode::PSUBD: + case Js::OpCode::PSUBQ: case Js::OpCode::PSUBW: case Js::OpCode::PSUBSW: case Js::OpCode::PSUBUSB: @@ -1833,10 +1838,12 @@ LowererMD::Legalize(IR::Instr *const instr, bool fPostRegAlloc) case Js::OpCode::PSLLDQ: case Js::OpCode::PSRLW: case Js::OpCode::PSRLD: + case Js::OpCode::PSRLQ: case Js::OpCode::PSRAW: case Js::OpCode::PSRAD: case Js::OpCode::PSLLW: case Js::OpCode::PSLLD: + case Js::OpCode::PSLLQ: Assert(AutoSystemInfo::Data.SSE2Available()); MakeDstEquSrc1(instr); diff --git a/lib/Backend/LowerMDShared.h b/lib/Backend/LowerMDShared.h index c6dda998bf8..694fbb8b763 100644 --- a/lib/Backend/LowerMDShared.h +++ b/lib/Backend/LowerMDShared.h @@ -262,6 +262,7 @@ class LowererMD IR::Instr * LowerCallI(IR::Instr * callInstr, ushort callFlags, bool isHelper = false, IR::Instr * insertBeforeInstrForCFG = nullptr); IR::Instr * LoadInt64HelperArgument(IR::Instr * instr, IR::Opnd* opnd); IR::Instr * LoadHelperArgument(IR::Instr * instr, IR::Opnd * opndArg); + IR::MemRefOpnd * LoadSimdHelperArgument(IR::Instr * instr, uint8 index); IR::Instr * LoadDoubleHelperArgument(IR::Instr * instr, IR::Opnd * opndArg); IR::Instr * LoadFloatHelperArgument(IR::Instr * instr, IR::Opnd * opndArg); IR::Instr * LowerEntryInstr(IR::EntryInstr * entryInstr); @@ -318,24 +319,36 @@ class LowererMD static IR::Instr * InsertCmovCC(const Js::OpCode opCode, IR::Opnd * dst, IR::Opnd* src1, IR::Instr* insertBeforeInstr, bool postRegAlloc = false); #ifdef ENABLE_SIMDJS + IR::Instr* Simd128LowerConstructor_2(IR::Instr *instr); + IR::Instr* Simd128LowerConstructor_4(IR::Instr *instr); + IR::Instr* Simd128LowerConstructor_8(IR::Instr *instr); + IR::Instr* Simd128LowerConstructor_16(IR::Instr *instr); + IR::Instr* Simd128LowerRcp(IR::Instr *instr, bool removeInstr = true); + IR::Instr* Simd128LowerRcpSqrt(IR::Instr *instr); + IR::Instr* Simd128LowerRcpSqrt(IR::Instr *instr); + void GenerateCheckedSimdLoad(IR::Instr * instr); + void GenerateSimdStore(IR::Instr * instr); + IR::Instr* Simd128LowerSelect(IR::Instr *instr); +#endif + +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) void Simd128InitOpcodeMap(); IR::Instr* Simd128Instruction(IR::Instr* instr); IR::Instr* Simd128LoadConst(IR::Instr* instr); + IR::Instr* LowerSimd128BitSelect(IR::Instr* instr); bool Simd128TryLowerMappedInstruction(IR::Instr *instr); IR::Instr* Simd128LowerUnMappedInstruction(IR::Instr *instr); - IR::Instr* Simd128LowerConstructor_2(IR::Instr *instr); - IR::Instr* Simd128LowerConstructor_4(IR::Instr *instr); - IR::Instr* Simd128LowerConstructor_8(IR::Instr *instr); - IR::Instr* Simd128LowerConstructor_16(IR::Instr *instr); IR::Instr* Simd128LowerLdLane(IR::Instr *instr); + IR::Instr* SIMD128LowerReplaceLane_2(IR::Instr *instr); + void EmitExtractInt64(IR::Opnd* dst, IR::Opnd* src, uint index, IR::Instr *instr); + void EmitInsertInt64(IR::Opnd* dst, uint index, IR::Instr *instr); + void EmitShiftByScalarI2(IR::Instr *instr, IR::JnHelperMethod helper); + IR::Instr* EmitSimdConversion(IR::Instr *instr, IR::JnHelperMethod helper); IR::Instr* SIMD128LowerReplaceLane_4(IR::Instr *instr); IR::Instr* SIMD128LowerReplaceLane_8(IR::Instr *instr); IR::Instr* SIMD128LowerReplaceLane_16(IR::Instr *instr); IR::Instr* Simd128LowerSplat(IR::Instr *instr); - IR::Instr* Simd128LowerRcp(IR::Instr *instr, bool removeInstr = true); IR::Instr* Simd128LowerSqrt(IR::Instr *instr); - IR::Instr* Simd128LowerRcpSqrt(IR::Instr *instr); - IR::Instr* Simd128LowerSelect(IR::Instr *instr); IR::Instr* Simd128LowerNeg(IR::Instr *instr); IR::Instr* Simd128LowerMulI4(IR::Instr *instr); IR::Instr* Simd128LowerShift(IR::Instr *instr); @@ -359,15 +372,16 @@ class LowererMD IR::Instr* Simd128LowerLessThanOrEqual(IR::Instr* instr); IR::Instr* Simd128LowerGreaterThanOrEqual(IR::Instr* instr); IR::Instr* Simd128LowerMinMax_F4(IR::Instr* instr); - IR::Instr* Simd128LowerMinMaxNum(IR::Instr* instr); IR::Instr* Simd128LowerAnyTrue(IR::Instr* instr); IR::Instr* Simd128LowerAllTrue(IR::Instr* instr); +#ifdef ENABLE_WASM_SIMD + IR::Opnd* Simd128CanonicalizeToBoolsBeforeReduction(IR::Instr* instr); +#endif BYTE Simd128GetTypedArrBytesPerElem(ValueType arrType); IR::Instr* Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode& cmpOpcode, IR::Opnd& dstOpnd); IR::Opnd* EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type = TyInt32); + IR::Opnd* EnregisterBoolConst(IR::Instr* instr, IR::Opnd *opnd, IRType type); SList * Simd128GetExtendedArgs(IR::Instr *instr); - void GenerateCheckedSimdLoad(IR::Instr * instr); - void GenerateSimdStore(IR::Instr * instr); void CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2); void InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *insertBeforeInstr); #endif diff --git a/lib/Backend/LowerMDSharedSimd128.cpp b/lib/Backend/LowerMDSharedSimd128.cpp index 1b621603d2b..c8d11537fdb 100644 --- a/lib/Backend/LowerMDSharedSimd128.cpp +++ b/lib/Backend/LowerMDSharedSimd128.cpp @@ -5,7 +5,448 @@ #include "Backend.h" +static IR::Instr* removeInstr(IR::Instr* instr) +{ + IR::Instr* prevInstr; + prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; +} + #ifdef ENABLE_SIMDJS +// FromVar +void +LowererMD::GenerateCheckedSimdLoad(IR::Instr * instr) +{ + Assert(instr->m_opcode == Js::OpCode::FromVar); + Assert(instr->GetSrc1()->GetType() == TyVar); + Assert(IRType_IsSimd128(instr->GetDst()->GetType())); + + bool checkRequired = instr->HasBailOutInfo(); + IR::LabelInstr * labelHelper = nullptr, *labelDone = nullptr; + IR::Instr * insertInstr = instr, *newInstr; + IR::RegOpnd * src = instr->GetSrc1()->AsRegOpnd(), *dst = instr->GetDst()->AsRegOpnd(); + Assert(!checkRequired || instr->GetBailOutKind() == IR::BailOutSimd128F4Only || instr->GetBailOutKind() == IR::BailOutSimd128I4Only); + + if (checkRequired) + { + labelHelper = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true); + labelDone = IR::LabelInstr::New(Js::OpCode::Label, this->m_func); + instr->InsertBefore(labelHelper); + instr->InsertAfter(labelDone); + insertInstr = labelHelper; + + GenerateObjectTest(instr->GetSrc1(), insertInstr, labelHelper); + + newInstr = IR::Instr::New(Js::OpCode::CMP, instr->m_func); + newInstr->SetSrc1(IR::IndirOpnd::New(instr->GetSrc1()->AsRegOpnd(), 0, TyMachPtr, instr->m_func)); + newInstr->SetSrc2(m_lowerer->LoadVTableValueOpnd(instr, dst->GetType() == TySimd128F4 ? VTableValue::VtableSimd128F4 : VTableValue::VtableSimd128I4)); + insertInstr->InsertBefore(newInstr); + Legalize(newInstr); + insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, labelHelper, this->m_func)); + instr->UnlinkSrc1(); + instr->UnlinkDst(); + this->m_lowerer->GenerateBailOut(instr); + + } + size_t valueOffset = dst->GetType() == TySimd128F4 ? Js::JavascriptSIMDFloat32x4::GetOffsetOfValue() : Js::JavascriptSIMDInt32x4::GetOffsetOfValue(); + Assert(valueOffset < INT_MAX); + newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::IndirOpnd::New(src, static_cast(valueOffset), dst->GetType(), this->m_func), this->m_func); + insertInstr->InsertBefore(newInstr); + + insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, this->m_func)); + // FromVar is converted to BailOut call. Don't remove. +} + +// ToVar +void LowererMD::GenerateSimdStore(IR::Instr * instr) +{ + IR::RegOpnd *dst, *src; + IRType type; + dst = instr->GetDst()->AsRegOpnd(); + src = instr->GetSrc1()->AsRegOpnd(); + type = src->GetType(); + + this->m_lowerer->LoadScriptContext(instr); + IR::Instr * instrCall = IR::Instr::New(Js::OpCode::CALL, instr->GetDst(), + IR::HelperCallOpnd::New(type == TySimd128F4 ? IR::HelperAllocUninitializedSimdF4 : IR::HelperAllocUninitializedSimdI4, this->m_func), this->m_func); + instr->InsertBefore(instrCall); + this->lowererMDArch.LowerCall(instrCall, 0); + + IR::Opnd * valDst; + if (type == TySimd128F4) + { + valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDFloat32x4::GetOffsetOfValue(), TySimd128F4, this->m_func); + } + else + { + valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDInt32x4::GetOffsetOfValue(), TySimd128I4, this->m_func); + } + + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func)); + instr->Remove(); +} + +IR::Instr* LowererMD::Simd128LowerRcpSqrt(IR::Instr *instr) +{ + Js::OpCode opcode = Js::OpCode::SQRTPS; + Simd128LowerRcp(instr, false); + + opcode = Js::OpCode::SQRTPS; + +#if 0 + else + { + Assert(instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2); + opcode = Js::OpCode::SQRTPD; + } +#endif // 0 + + instr->InsertBefore(IR::Instr::New(opcode, instr->GetDst(), instr->GetDst(), m_func)); + IR::Instr* prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; +} + +IR::Instr* LowererMD::Simd128LowerRcp(IR::Instr *instr, bool removeInstr) +{ + Js::OpCode opcode = Js::OpCode::DIVPS; + IR::Opnd *dst, *src1; + dst = instr->GetDst(); + src1 = instr->GetSrc1(); + + Assert(dst && dst->IsRegOpnd()); + Assert(src1 && src1->IsRegOpnd()); + Assert(instr->GetSrc2() == nullptr); + Assert(src1->IsSimd128F4() || src1->IsSimd128I4()); + opcode = Js::OpCode::DIVPS; + +#if 0 + { + Assert(instr->m_opcode == Js::OpCode::Simd128_Rcp_D2 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2); + Assert(src1->IsSimd128D2()); + opcode = Js::OpCode::DIVPD; + x86_allones_mask = (void*)(&X86_ALL_ONES_D2); + } +#endif // 0 + + IR::RegOpnd* tmp = IR::RegOpnd::New(src1->GetType(), m_func); + IR::Instr* movInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllOnesF4Addr(), src1->GetType(), m_func), m_func); + instr->InsertBefore(movInstr); + Legalize(movInstr); + + instr->InsertBefore(IR::Instr::New(opcode, tmp, tmp, src1, m_func)); + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, tmp, m_func)); + if (removeInstr) + { + IR::Instr* prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; + } + return instr; +} + +IR::Instr* LowererMD::Simd128LowerConstructor_8(IR::Instr *instr) +{ + IR::Opnd* dst = nullptr; + IR::Opnd* srcs[8]; + + //Simd128_IntsToI8/U8/B8 + Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToI8 || instr->m_opcode == Js::OpCode::Simd128_IntsToU8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8); + SList *args = Simd128GetExtendedArgs(instr); + + Assert(args->Count() == 9); + dst = args->Pop(); + + uint i = 0; + while (!args->Empty() && i < 8) + { + srcs[i] = args->Pop(); + // src's might have been constant prop'd. Enregister them if so. + srcs[i] = (instr->m_opcode == Js::OpCode::Simd128_IntsToB8) ? + EnregisterBoolConst(instr, srcs[i], TyInt16) : + EnregisterIntConst(instr, srcs[i], TyInt16); + + Assert(srcs[i]->GetType() == TyInt16 && srcs[i]->IsRegOpnd()); + // PINSRW dst, srcs[i], i + instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRW, dst, srcs[i], IR::IntConstOpnd::New(i, TyInt8, m_func, true), m_func)); + i++; + } + if (instr->m_opcode == Js::OpCode::Simd128_IntsToB8) + { + instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst); + } + IR::Instr* prevInstr; + prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; +} + +IR::Instr* LowererMD::Simd128LowerConstructor_16(IR::Instr *instr) +{ + IR::Opnd* dst = nullptr; + IR::Opnd* srcs[16]; + //Simd128_IntsToI16/U16/B16 + Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToU16 || instr->m_opcode == Js::OpCode::Simd128_IntsToI16 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16); + SList *args = Simd128GetExtendedArgs(instr); + intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0); +#if DBG + // using only one SIMD temp + intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue); +#endif + intptr_t address; + IR::Instr * newInstr; + + Assert(args->Count() == 17); + dst = args->Pop(); + + uint i = 0; + while (!args->Empty() && i < 16) + { + srcs[i] = args->Pop(); + // src's might have been constant prop'd. Enregister them if so. + srcs[i] = (instr->m_opcode == Js::OpCode::Simd128_IntsToB16) ? + EnregisterBoolConst(instr, srcs[i], TyInt8) : + EnregisterIntConst(instr, srcs[i], TyInt8); + Assert(srcs[i]->GetType() == TyInt8 && srcs[i]->IsRegOpnd()); + + address = tempSIMD + i; + // check for buffer overrun + Assert((intptr_t)address < endAddrSIMD); + // MOV [temp + i], src[i] (TyInt8) + newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(tempSIMD + i, TyInt8, m_func), srcs[i], m_func); + instr->InsertBefore(newInstr); + Legalize(newInstr); + i++; + } + // MOVUPS dst, [temp] + newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(tempSIMD, TySimd128U16, m_func), m_func); + instr->InsertBefore(newInstr); + Legalize(newInstr); + + if (instr->m_opcode == Js::OpCode::Simd128_IntsToB16) + { + instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst); + } + + IR::Instr* prevInstr; + prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; +} + +IR::Instr* LowererMD::Simd128LowerConstructor_4(IR::Instr *instr) +{ + IR::Opnd* dst = nullptr; + IR::Opnd* src1 = nullptr; + IR::Opnd* src2 = nullptr; + IR::Opnd* src3 = nullptr; + IR::Opnd* src4 = nullptr; + IR::Instr* newInstr = nullptr; + + Assert(instr->m_opcode == Js::OpCode::Simd128_FloatsToF4 || + instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || + instr->m_opcode == Js::OpCode::Simd128_IntsToI4 || + instr->m_opcode == Js::OpCode::Simd128_IntsToU4); + + // use MOVSS for both int32x4 and float32x4. MOVD zeroes upper bits. + Js::OpCode movOpcode = Js::OpCode::MOVSS; + Js::OpCode shiftOpcode = Js::OpCode::PSLLDQ; + SList *args = Simd128GetExtendedArgs(instr); + + // The number of src opnds should be exact. If opnds are missing, they should be filled in by globopt during type-spec. + Assert(args->Count() == 5); + + dst = args->Pop(); + src1 = args->Pop(); + src2 = args->Pop(); + src3 = args->Pop(); + src4 = args->Pop(); + + if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4) + { + // We don't have f32 type-spec, so we type-spec to f64 and convert to f32 before use. + if (src1->IsFloat64()) + { + IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); + // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 + newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func); + instr->InsertBefore(newInstr); + src1 = regOpnd32; + } + if (src2->IsFloat64()) + { + IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); + // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 + newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src2, this->m_func); + instr->InsertBefore(newInstr); + src2 = regOpnd32; + } + if (src3->IsFloat64()) + { + IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); + // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 + newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src3, this->m_func); + instr->InsertBefore(newInstr); + src3 = regOpnd32; + } + if (src4->IsFloat64()) + { + IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); + // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 + newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src4, this->m_func); + instr->InsertBefore(newInstr); + src4 = regOpnd32; + } + + Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat32); + Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat32); + Assert(src3->IsRegOpnd() && src3->GetType() == TyFloat32); + Assert(src4->IsRegOpnd() && src4->GetType() == TyFloat32); + + // MOVSS dst, src4 + instr->InsertBefore(IR::Instr::New(movOpcode, dst, src4, m_func)); + // PSLLDQ dst, dst, 4 + instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func)); + // MOVSS dst, src3 + instr->InsertBefore(IR::Instr::New(movOpcode, dst, src3, m_func)); + // PSLLDQ dst, 4 + instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func)); + // MOVSS dst, src2 + instr->InsertBefore(IR::Instr::New(movOpcode, dst, src2, m_func)); + // PSLLDQ dst, 4 + instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func)); + // MOVSS dst, src1 + instr->InsertBefore(IR::Instr::New(movOpcode, dst, src1, m_func)); + } + else + { + //Simd128_IntsToI4/U4 + IR::RegOpnd *temp = IR::RegOpnd::New(TyFloat32, m_func); + + // src's might have been constant prop'ed. Enregister them if so. + src4 = EnregisterIntConst(instr, src4); + src3 = EnregisterIntConst(instr, src3); + src2 = EnregisterIntConst(instr, src2); + src1 = EnregisterIntConst(instr, src1); + + Assert(src1->GetType() == TyInt32 && src1->IsRegOpnd()); + Assert(src2->GetType() == TyInt32 && src2->IsRegOpnd()); + Assert(src3->GetType() == TyInt32 && src3->IsRegOpnd()); + Assert(src4->GetType() == TyInt32 && src4->IsRegOpnd()); + + // MOVD t(TyFloat32), src4(TyInt32) + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src4, m_func)); + + // MOVSS dst, t + instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); + // PSLLDQ dst, dst, 4 + instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func)); + + // MOVD t(TyFloat32), sr34(TyInt32) + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src3, m_func)); + // MOVSS dst, t + instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); + // PSLLDQ dst, dst, 4 + instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func)); + + // MOVD t(TyFloat32), src2(TyInt32) + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src2, m_func)); + // MOVSS dst, t + instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); + // PSLLDQ dst, dst, 4 + instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func)); + + // MOVD t(TyFloat32), src1(TyInt32) + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src1, m_func)); + // MOVSS dst, t + instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); + + if (instr->m_opcode == Js::OpCode::Simd128_IntsToB4) + { + instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst); + } + } + + IR::Instr* prevInstr; + prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; +} +#if 0 +IR::Instr *LowererMD::Simd128LowerConstructor_2(IR::Instr *instr) +{ + IR::Opnd* dst = nullptr; + IR::Opnd* src1 = nullptr; + IR::Opnd* src2 = nullptr; + + Assert(instr->m_opcode == Js::OpCode::Simd128_DoublesToD2); + dst = instr->GetDst(); + + src1 = instr->GetSrc1(); + src2 = instr->GetSrc2(); + + Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat64); + Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat64); + // MOVSD dst, src2 + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src2, m_func)); + // PSLLDQ dst, dst, 8 + instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, dst, dst, IR::IntConstOpnd::New(TySize[TyFloat64], TyInt8, m_func, true), m_func)); + // MOVSD dst, src1 + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func)); + Assert(dst->IsRegOpnd() && dst->IsSimd128()); + IR::Instr* prevInstr; + prevInstr = instr->m_prev; + instr->Remove(); + return prevInstr; +} +#endif + + +IR::Instr* LowererMD::Simd128LowerSelect(IR::Instr *instr) +{ + Assert(instr->m_opcode == Js::OpCode::Simd128_Select_F4 || instr->m_opcode == Js::OpCode::Simd128_Select_I4 /*|| instr->m_opcode == Js::OpCode::Simd128_Select_D2 */ || + instr->m_opcode == Js::OpCode::Simd128_Select_I8 || instr->m_opcode == Js::OpCode::Simd128_Select_I16 || instr->m_opcode == Js::OpCode::Simd128_Select_U4 || + instr->m_opcode == Js::OpCode::Simd128_Select_U8 || instr->m_opcode == Js::OpCode::Simd128_Select_U16); + + IR::Opnd* dst = nullptr; + IR::Opnd* src1 = nullptr; + IR::Opnd* src2 = nullptr; + IR::Opnd* src3 = nullptr; + SList *args = Simd128GetExtendedArgs(instr); + // The number of src opnds should be exact. Missing opnds means type-error, and we should generate an exception throw instead (or globopt does). + Assert(args->Count() == 4); + dst = args->Pop(); + src1 = args->Pop(); // mask + src2 = args->Pop(); // trueValue + src3 = args->Pop(); // falseValue + + Assert(dst->IsRegOpnd() && dst->IsSimd128()); + Assert(src1->IsRegOpnd() && src1->IsSimd128()); + Assert(src2->IsRegOpnd() && src2->IsSimd128()); + Assert(src3->IsRegOpnd() && src3->IsSimd128()); + + IR::RegOpnd *tmp = IR::RegOpnd::New(src1->GetType(), m_func); + IR::Instr *pInstr = nullptr; + // ANDPS tmp1, mask, tvalue + pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, src1, src2, m_func); + instr->InsertBefore(pInstr); + Legalize(pInstr); + // ANDPS dst, mask, fvalue + pInstr = IR::Instr::New(Js::OpCode::ANDNPS, dst, src1, src3, m_func); + instr->InsertBefore(pInstr); + Legalize(pInstr); + // ORPS dst, dst, tmp1 + pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, tmp, m_func); + instr->InsertBefore(pInstr); + + pInstr = instr->m_prev; + instr->Remove(); + return pInstr; +} +#endif + +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)] @@ -53,24 +494,18 @@ bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr) Assert(opcode == Js::OpCode::ANDPS); instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskF4Addr(), instr->GetSrc1()->GetType(), m_func)); break; -#if 0 case Js::OpCode::Simd128_Abs_D2: Assert(opcode == Js::OpCode::ANDPD); instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskD2Addr(), instr->GetSrc1()->GetType(), m_func)); break; -#endif // 0 - case Js::OpCode::Simd128_Neg_F4: Assert(opcode == Js::OpCode::XORPS); instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), instr->GetSrc1()->GetType(), m_func)); break; -#if 0 case Js::OpCode::Simd128_Neg_D2: Assert(opcode == Js::OpCode::XORPS); instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskD2Addr(), instr->GetSrc1()->GetType(), m_func)); break; -#endif // 0 - case Js::OpCode::Simd128_Not_I4: case Js::OpCode::Simd128_Not_I16: case Js::OpCode::Simd128_Not_I8: @@ -84,9 +519,9 @@ bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr) instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), instr->GetSrc1()->GetType(), m_func)); break; case Js::OpCode::Simd128_Gt_F4: - //case Js::OpCode::Simd128_Gt_D2: + case Js::OpCode::Simd128_Gt_D2: case Js::OpCode::Simd128_GtEq_F4: - //case Js::OpCode::Simd128_GtEq_D2: + case Js::OpCode::Simd128_GtEq_D2: case Js::OpCode::Simd128_Lt_I4: case Js::OpCode::Simd128_Lt_I8: case Js::OpCode::Simd128_Lt_I16: @@ -113,6 +548,20 @@ bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr) return true; } +IR::MemRefOpnd * +LowererMD::LoadSimdHelperArgument(IR::Instr * instr, uint8 index) +{ + //the most reliable way to pass a simd value on x86/x64 win/lnx across calls + //is to pass a pointer to a SIMD value in the simd temporary area. + //otherwise we have to use __m128 and msvc intrinsics which may or may not be the same across + //MSVC and Clang + + IR::MemRefOpnd* srcMemRef = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), TySimd128F4, m_func); + IR::AddrOpnd* argAddress = IR::AddrOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), IR::AddrOpndKindDynamicMisc, m_func, true /* doesn't come from a user */); + LoadHelperArgument(instr, argAddress); + return srcMemRef; +} + IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) { switch (instr->m_opcode) @@ -120,6 +569,7 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_LdC: return Simd128LoadConst(instr); +#ifdef ENABLE_SIMD case Js::OpCode::Simd128_FloatsToF4: case Js::OpCode::Simd128_IntsToI4: case Js::OpCode::Simd128_IntsToU4: @@ -134,11 +584,31 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_IntsToB16: return Simd128LowerConstructor_16(instr); + case Js::OpCode::Simd128_Rcp_F4: + //case Js::OpCode::Simd128_Rcp_D2: + return Simd128LowerRcp(instr); + //SQRT + case Js::OpCode::Simd128_RcpSqrt_F4: + //case Js::OpCode::Simd128_RcpSqrt_D2: + return Simd128LowerRcpSqrt(instr); + + case Js::OpCode::Simd128_Select_F4: + case Js::OpCode::Simd128_Select_I4: + //case Js::OpCode::Simd128_Select_D2: + case Js::OpCode::Simd128_Select_I8: + case Js::OpCode::Simd128_Select_I16: + case Js::OpCode::Simd128_Select_U4: + case Js::OpCode::Simd128_Select_U8: + case Js::OpCode::Simd128_Select_U16: + return Simd128LowerSelect(instr); +#endif + #if 0 case Js::OpCode::Simd128_DoublesToD2: return Simd128LowerConstructor_2(instr); #endif // 0 + case Js::OpCode::Simd128_ExtractLane_I2: case Js::OpCode::Simd128_ExtractLane_I4: case Js::OpCode::Simd128_ExtractLane_I8: case Js::OpCode::Simd128_ExtractLane_I16: @@ -151,6 +621,9 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_ExtractLane_F4: return Simd128LowerLdLane(instr); + case Js::OpCode::Simd128_ReplaceLane_I2: + case Js::OpCode::Simd128_ReplaceLane_D2: + return SIMD128LowerReplaceLane_2(instr); case Js::OpCode::Simd128_ReplaceLane_I4: case Js::OpCode::Simd128_ReplaceLane_F4: case Js::OpCode::Simd128_ReplaceLane_U4: @@ -169,38 +642,21 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_Splat_F4: case Js::OpCode::Simd128_Splat_I4: - //case Js::OpCode::Simd128_Splat_D2: + case Js::OpCode::Simd128_Splat_I2: + case Js::OpCode::Simd128_Splat_D2: case Js::OpCode::Simd128_Splat_I8: case Js::OpCode::Simd128_Splat_I16: case Js::OpCode::Simd128_Splat_U4: - case Js::OpCode::Simd128_Splat_U8: - case Js::OpCode::Simd128_Splat_U16: - case Js::OpCode::Simd128_Splat_B4: - case Js::OpCode::Simd128_Splat_B8: - case Js::OpCode::Simd128_Splat_B16: - return Simd128LowerSplat(instr); - - case Js::OpCode::Simd128_Rcp_F4: - //case Js::OpCode::Simd128_Rcp_D2: - return Simd128LowerRcp(instr); - - case Js::OpCode::Simd128_Sqrt_F4: - //case Js::OpCode::Simd128_Sqrt_D2: - return Simd128LowerSqrt(instr); - - case Js::OpCode::Simd128_RcpSqrt_F4: - //case Js::OpCode::Simd128_RcpSqrt_D2: - return Simd128LowerRcpSqrt(instr); - - case Js::OpCode::Simd128_Select_F4: - case Js::OpCode::Simd128_Select_I4: - //case Js::OpCode::Simd128_Select_D2: - case Js::OpCode::Simd128_Select_I8: - case Js::OpCode::Simd128_Select_I16: - case Js::OpCode::Simd128_Select_U4: - case Js::OpCode::Simd128_Select_U8: - case Js::OpCode::Simd128_Select_U16: - return Simd128LowerSelect(instr); + case Js::OpCode::Simd128_Splat_U8: + case Js::OpCode::Simd128_Splat_U16: + case Js::OpCode::Simd128_Splat_B4: + case Js::OpCode::Simd128_Splat_B8: + case Js::OpCode::Simd128_Splat_B16: + return Simd128LowerSplat(instr); + + case Js::OpCode::Simd128_Sqrt_F4: + //case Js::OpCode::Simd128_Sqrt_D2: + return Simd128LowerSqrt(instr); case Js::OpCode::Simd128_Neg_I4: case Js::OpCode::Simd128_Neg_I8: @@ -229,6 +685,9 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_ShLtByScalar_U8: case Js::OpCode::Simd128_ShRtByScalar_U16: case Js::OpCode::Simd128_ShLtByScalar_U16: + case Js::OpCode::Simd128_ShLtByScalar_I2: + case Js::OpCode::Simd128_ShRtByScalar_U2: + case Js::OpCode::Simd128_ShRtByScalar_I2: return Simd128LowerShift(instr); case Js::OpCode::Simd128_LdArr_I4: @@ -313,6 +772,14 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_FromFloat32x4_U4: return Simd128LowerUint32x4FromFloat32x4(instr); + case Js::OpCode::Simd128_FromInt64x2_D2: + return EmitSimdConversion(instr, IR::HelperSimd128ConvertSD2); + case Js::OpCode::Simd128_FromUint64x2_D2: + return EmitSimdConversion(instr, IR::HelperSimd128ConvertUD2); + case Js::OpCode::Simd128_FromFloat64x2_I2: + return EmitSimdConversion(instr, IR::HelperSimd128TruncateI2); + case Js::OpCode::Simd128_FromFloat64x2_U2: + return EmitSimdConversion(instr, IR::HelperSimd128TruncateU2); case Js::OpCode::Simd128_Neq_I4: case Js::OpCode::Simd128_Neq_I8: case Js::OpCode::Simd128_Neq_I16: @@ -348,22 +815,43 @@ IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) case Js::OpCode::Simd128_Max_F4: return Simd128LowerMinMax_F4(instr); + case Js::OpCode::Simd128_AnyTrue_B2: case Js::OpCode::Simd128_AnyTrue_B4: case Js::OpCode::Simd128_AnyTrue_B8: case Js::OpCode::Simd128_AnyTrue_B16: return Simd128LowerAnyTrue(instr); + case Js::OpCode::Simd128_AllTrue_B2: case Js::OpCode::Simd128_AllTrue_B4: case Js::OpCode::Simd128_AllTrue_B8: case Js::OpCode::Simd128_AllTrue_B16: return Simd128LowerAllTrue(instr); - + case Js::OpCode::Simd128_BitSelect_I4: + return LowerSimd128BitSelect(instr); default: AssertMsg(UNREACHED, "Unsupported Simd128 instruction"); } return nullptr; } + +IR::Instr* LowererMD::LowerSimd128BitSelect(IR::Instr* instr) +{ + SList *args = Simd128GetExtendedArgs(instr); + IR::Opnd *dst = args->Pop(); + IR::Opnd *src1 = args->Pop(); + IR::Opnd *src2 = args->Pop(); + IR::Opnd *mask = args->Pop(); + + IR::Instr* pInstr = IR::Instr::New(Js::OpCode::PXOR, dst, src1, src2, m_func); + instr->InsertBefore(pInstr); + Legalize(pInstr); + + instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func)); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PXOR, dst, dst, src2, m_func)); + return removeInstr(instr); +} + IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr) { Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC); @@ -375,7 +863,7 @@ IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr) AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value; // MOVUPS dst, [const] - + void *pValue = NativeCodeDataNewNoFixup(this->m_func->GetNativeCodeDataAllocator(), SIMDType, value); IR::Opnd * simdRef; if (!m_func->IsOOPJIT()) @@ -405,7 +893,10 @@ IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr) IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd) { Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 || - instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16); + instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16 || + instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || + instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 + ); IR::Instr *pInstr; //dst = cmpOpcode dst, X86_ALL_ZEROS pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func); @@ -418,256 +909,153 @@ IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpC return instr; } -IR::Instr* LowererMD::Simd128LowerConstructor_8(IR::Instr *instr) +IR::Instr* LowererMD::EmitSimdConversion(IR::Instr *instr, IR::JnHelperMethod helper) { - IR::Opnd* dst = nullptr; - IR::Opnd* srcs[8]; - - //Simd128_IntsToI8/U8/B8 - Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToI8 || instr->m_opcode == Js::OpCode::Simd128_IntsToU8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8); - SList *args = Simd128GetExtendedArgs(instr); + IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0); + IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1); + m_lowerer->InsertMove(srcMemRef, instr->UnlinkSrc1(), instr); - Assert(args->Count() == 9); - dst = args->Pop(); + IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func); + instr->InsertBefore(helperCall); + this->ChangeToHelperCall(helperCall, helper); - uint i = 0; - while (!args->Empty() && i < 8) - { - srcs[i] = args->Pop(); - // src's might have been constant prop'd. Enregister them if so. - srcs[i] = EnregisterIntConst(instr, srcs[i], TyInt16); - Assert(srcs[i]->GetType() == TyInt16 && srcs[i]->IsRegOpnd()); - // PINSRW dst, srcs[i], i - instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRW, dst, srcs[i], IR::IntConstOpnd::New(i, TyInt8, m_func, true), m_func)); - i++; - } - if (instr->m_opcode == Js::OpCode::Simd128_IntsToB8) - { - instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst); - } - IR::Instr* prevInstr; - prevInstr = instr->m_prev; - instr->Remove(); - return prevInstr; + m_lowerer->InsertMove(instr->UnlinkDst(), dstMemRef, instr); + return removeInstr(instr); } -IR::Instr* LowererMD::Simd128LowerConstructor_16(IR::Instr *instr) +void LowererMD::EmitShiftByScalarI2(IR::Instr *instr, IR::JnHelperMethod helper) { - IR::Opnd* dst = nullptr; - IR::Opnd* srcs[16]; - //Simd128_IntsToI16/U16/B16 - Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToU16 || instr->m_opcode == Js::OpCode::Simd128_IntsToI16 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16); - SList *args = Simd128GetExtendedArgs(instr); - intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0); -#if DBG - // using only one SIMD temp - intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue); -#endif - intptr_t address; - IR::Instr * newInstr; - - Assert(args->Count() == 17); - dst = args->Pop(); - - uint i = 0; - while (!args->Empty() && i < 16) - { - srcs[i] = args->Pop(); - // src's might have been constant prop'd. Enregister them if so. - srcs[i] = EnregisterIntConst(instr, srcs[i], TyInt8); - Assert(srcs[i]->GetType() == TyInt8 && srcs[i]->IsRegOpnd()); - - address = tempSIMD + i; - // check for buffer overrun - Assert((intptr_t)address < endAddrSIMD); - // MOV [temp + i], src[i] (TyInt8) - newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(tempSIMD + i, TyInt8, m_func), srcs[i], m_func); - instr->InsertBefore(newInstr); - Legalize(newInstr); - i++; - } - // MOVUPS dst, [temp] - newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(tempSIMD, TySimd128U16, m_func), m_func); - instr->InsertBefore(newInstr); - Legalize(newInstr); - - if (instr->m_opcode == Js::OpCode::Simd128_IntsToB16) - { - instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst); - } - - IR::Instr* prevInstr; - prevInstr = instr->m_prev; - instr->Remove(); - return prevInstr; + IR::Opnd* src2 = instr->GetSrc2(); + IR::Opnd* dst = instr->GetDst(); + LoadHelperArgument(instr, src2); + IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0); + m_lowerer->InsertMove(srcMemRef, instr->GetSrc1(), instr); + IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1); + IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func); + instr->InsertBefore(helperCall); + this->ChangeToHelperCall(helperCall, helper); + m_lowerer->InsertMove(dst, dstMemRef, instr); } -IR::Instr* LowererMD::Simd128LowerConstructor_4(IR::Instr *instr) +IR::Instr * LowererMD::SIMD128LowerReplaceLane_2(IR::Instr *instr) { - IR::Opnd* dst = nullptr; - IR::Opnd* src1 = nullptr; - IR::Opnd* src2 = nullptr; - IR::Opnd* src3 = nullptr; - IR::Opnd* src4 = nullptr; - IR::Instr* newInstr = nullptr; - - Assert(instr->m_opcode == Js::OpCode::Simd128_FloatsToF4 || - instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || - instr->m_opcode == Js::OpCode::Simd128_IntsToI4 || - instr->m_opcode == Js::OpCode::Simd128_IntsToU4); - - // use MOVSS for both int32x4 and float32x4. MOVD zeroes upper bits. - Js::OpCode movOpcode = Js::OpCode::MOVSS; - Js::OpCode shiftOpcode = Js::OpCode::PSLLDQ; SList *args = Simd128GetExtendedArgs(instr); + IR::Opnd *dst = args->Pop(); + IR::Opnd *src1 = args->Pop(); + IR::Opnd *src2 = args->Pop(); + IR::Opnd *src3 = args->Pop(); - // The number of src opnds should be exact. If opnds are missing, they should be filled in by globopt during type-spec. - Assert(args->Count() == 5); - - dst = args->Pop(); - src1 = args->Pop(); - src2 = args->Pop(); - src3 = args->Pop(); - src4 = args->Pop(); + int lane = src2->AsIntConstOpnd()->AsInt32(); + Assert(dst->IsSimd128() && src1->IsSimd128()); - if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4) + if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_D2) { - // We don't have f32 type-spec, so we type-spec to f64 and convert to f32 before use. - if (src1->IsFloat64()) + AssertMsg(AutoSystemInfo::Data.SSE2Available(), "SSE2 not supported"); + Assert(src3->IsFloat64()); + m_lowerer->InsertMove(dst, src1, instr); + if (lane) { - IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); - // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 - newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func); - instr->InsertBefore(newInstr); - src1 = regOpnd32; - } - if (src2->IsFloat64()) - { - IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); - // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 - newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src2, this->m_func); - instr->InsertBefore(newInstr); - src2 = regOpnd32; - } - if (src3->IsFloat64()) - { - IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); - // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 - newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src3, this->m_func); - instr->InsertBefore(newInstr); - src3 = regOpnd32; + instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPD, dst, src3, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func)); } - if (src4->IsFloat64()) + else { - IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); - // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 - newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src4, this->m_func); - instr->InsertBefore(newInstr); - src4 = regOpnd32; - } + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src3, m_func)); + } + return removeInstr(instr); + } - Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat32); - Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat32); - Assert(src3->IsRegOpnd() && src3->GetType() == TyFloat32); - Assert(src4->IsRegOpnd() && src4->GetType() == TyFloat32); + Assert(src3->IsInt64()); - // MOVSS dst, src4 - instr->InsertBefore(IR::Instr::New(movOpcode, dst, src4, m_func)); - // PSLLDQ dst, dst, 4 - instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func)); - // MOVSS dst, src3 - instr->InsertBefore(IR::Instr::New(movOpcode, dst, src3, m_func)); - // PSLLDQ dst, 4 - instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func)); - // MOVSS dst, src2 - instr->InsertBefore(IR::Instr::New(movOpcode, dst, src2, m_func)); - // PSLLDQ dst, 4 - instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func)); - // MOVSS dst, src1 - instr->InsertBefore(IR::Instr::New(movOpcode, dst, src1, m_func)); + if (AutoSystemInfo::Data.SSE4_1Available()) + { + m_lowerer->InsertMove(dst, src1, instr); + instr->SetDst(dst); + EmitInsertInt64(src3, lane, instr); } else { - //Simd128_IntsToI4/U4 - IR::RegOpnd *temp = IR::RegOpnd::New(TyFloat32, m_func); - - // src's might have been constant prop'd. Enregister them if so. - src4 = EnregisterIntConst(instr, src4); - src3 = EnregisterIntConst(instr, src3); - src2 = EnregisterIntConst(instr, src2); - src1 = EnregisterIntConst(instr, src1); - - Assert(src1->GetType() == TyInt32 && src1->IsRegOpnd()); - Assert(src2->GetType() == TyInt32 && src2->IsRegOpnd()); - Assert(src3->GetType() == TyInt32 && src3->IsRegOpnd()); - Assert(src4->GetType() == TyInt32 && src4->IsRegOpnd()); - - // MOVD t(TyFloat32), src4(TyInt32) - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src4, m_func)); - - // MOVSS dst, t - instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); - // PSLLDQ dst, dst, 4 - instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func)); - - // MOVD t(TyFloat32), sr34(TyInt32) - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src3, m_func)); - // MOVSS dst, t - instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); - // PSLLDQ dst, dst, 4 - instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func)); - - // MOVD t(TyFloat32), src2(TyInt32) - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src2, m_func)); - // MOVSS dst, t - instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); - // PSLLDQ dst, dst, 4 - instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func)); - - // MOVD t(TyFloat32), src1(TyInt32) - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src1, m_func)); - // MOVSS dst, t - instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func)); - - if (instr->m_opcode == Js::OpCode::Simd128_IntsToB4) - { - instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst); - } - } - - IR::Instr* prevInstr; - prevInstr = instr->m_prev; - instr->Remove(); - return prevInstr; + LoadHelperArgument(instr, src2); + LoadInt64HelperArgument(instr, src3); + IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0); + m_lowerer->InsertMove(srcMemRef, src1, instr); + IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1); + IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func); + instr->InsertBefore(helperCall); + this->ChangeToHelperCall(helperCall, IR::HelperSimd128ReplaceLaneI2); + m_lowerer->InsertMove(dst, dstMemRef, instr); + } + return removeInstr(instr); } -#if 0 -IR::Instr *LowererMD::Simd128LowerConstructor_2(IR::Instr *instr) -{ - IR::Opnd* dst = nullptr; - IR::Opnd* src1 = nullptr; - IR::Opnd* src2 = nullptr; - - Assert(instr->m_opcode == Js::OpCode::Simd128_DoublesToD2); - dst = instr->GetDst(); - src1 = instr->GetSrc1(); - src2 = instr->GetSrc2(); +void LowererMD::EmitInsertInt64(IR::Opnd* src, uint index, IR::Instr *instr) +{ + IR::Opnd* dst = instr->GetDst(); + Assert(dst->IsSimd128() && src->IsInt64()); - Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat64); - Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat64); - // MOVSD dst, src2 - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src2, m_func)); - // PSLLDQ dst, dst, 8 - instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, dst, dst, IR::IntConstOpnd::New(TySize[TyFloat64], TyInt8, m_func, true), m_func)); - // MOVSD dst, src1 - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func)); - Assert(dst->IsRegOpnd() && dst->IsSimd128()); - IR::Instr* prevInstr; - prevInstr = instr->m_prev; - instr->Remove(); - return prevInstr; + if (AutoSystemInfo::Data.SSE4_1Available()) + { +#ifdef _M_IX86 + index *= 2; + Int64RegPair srcPair = m_func->FindOrCreateInt64Pair(src); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.low, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.high, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func)); +#else + instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); +#endif +} + else + { + intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0); +#ifdef _M_IX86 + Int64RegPair src1Pair = m_func->FindOrCreateInt64Pair(src); + IR::Opnd* lower = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func); + m_lowerer->InsertMove(lower, src1Pair.low, instr); + IR::Opnd* higher = IR::MemRefOpnd::New(tempSIMD + 4, TyMachPtr, m_func); + m_lowerer->InsertMove(higher, src1Pair.high, instr); +#else + IR::Opnd* mem = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func); + m_lowerer->InsertMove(mem, src, instr); +#endif + + IR::MemRefOpnd* tmp = IR::MemRefOpnd::New(tempSIMD, TyFloat64, m_func); + Js::OpCode opcode = (index) ? Js::OpCode::MOVHPD : Js::OpCode::MOVLPD; + IR::Instr* newInstr = IR::Instr::New(opcode, dst, tmp, m_func); + instr->InsertBefore(newInstr); + newInstr->HoistMemRefAddress(tmp, Js::OpCode::MOV); + Legalize(newInstr); + } } + +void LowererMD::EmitExtractInt64(IR::Opnd* dst, IR::Opnd* src, uint index, IR::Instr *instr) +{ + Assert(index == 0 || index == 1); + Assert(dst->IsInt64() && src->IsSimd128()); + if (AutoSystemInfo::Data.SSE4_1Available()) + { +#ifdef _M_IX86 + index *= 2; + Int64RegPair dstPair = m_func->FindOrCreateInt64Pair(dst); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.low, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.high, src, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func)); +#else + instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); #endif + } + else + { + IR::Opnd* tmp = src; + if (index) + { + tmp = IR::RegOpnd::New(TySimd128F4, m_func); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, tmp, src, IR::IntConstOpnd::New(2 | 3 << 2, TyInt8, m_func, true), m_func)); + } + //kludg-ish; we need a new instruction for LowerReinterpretPrimitive to transform + //and dummy one for a caller to remove + IR::Instr* tmpInstr = IR::Instr::New(Js::OpCode::Simd128_ExtractLane_I2, dst, tmp->UseWithNewType(TyFloat64, m_func), m_func); + instr->InsertBefore(tmpInstr); + m_lowerer->LowerReinterpretPrimitive(tmpInstr); + } +} IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) { @@ -679,14 +1067,18 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) src1 = instr->GetSrc1(); src2 = instr->GetSrc2(); - Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64)); + Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64 || dst->IsInt64())); Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128()); Assert(src2 && src2->IsIntConstOpnd()); + laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32(); laneWidth = 4; switch (instr->m_opcode) { + case Js::OpCode::Simd128_ExtractLane_I2: + laneWidth = 8; + break; case Js::OpCode::Simd128_ExtractLane_F4: movOpcode = Js::OpCode::MOVSS; Assert(laneIndex < 4); @@ -721,6 +1113,11 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) Assert(UNREACHED); } + if (laneWidth == 8) //Simd128_ExtractLane_I2 + { + EmitExtractInt64(dst, instr->GetSrc1(), laneIndex, instr); + } + else { IR::Opnd* tmp = src1; if (laneIndex != 0) @@ -737,7 +1134,7 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) // dst has the 4-byte lane if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 || - instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16|| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16|| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16) + instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16) { // extract the 1/2 bytes sublane IR::Instr *newInstr = nullptr; @@ -777,13 +1174,13 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16) { - IR::Instr* pInstr = nullptr; + IR::Instr* pInstr = nullptr; IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func); - // cmp dst, -1 + // cmp dst, 0 pInstr = IR::Instr::New(Js::OpCode::CMP, m_func); pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func)); - pInstr->SetSrc2(IR::IntConstOpnd::New(-1, laneType, m_func, true)); + pInstr->SetSrc2(IR::IntConstOpnd::New(0, laneType, m_func, true)); instr->InsertBefore(pInstr); Legalize(pInstr); @@ -792,8 +1189,8 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) instr->InsertBefore(pInstr); Legalize(pInstr); - // sete tmp(TyInt8) - pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func); + // setne tmp(TyInt8) + pInstr = IR::Instr::New(Js::OpCode::SETNE, tmp, tmp, m_func); instr->InsertBefore(pInstr); Legalize(pInstr); @@ -818,7 +1215,7 @@ IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr) Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 || src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 || - src1->GetType() == TyUint8 || src1->GetType() == TyUint32)); + src1->GetType() == TyUint8 || src1->GetType() == TyUint32 || src1->IsInt64())); Assert(!instr->GetSrc2()); @@ -838,12 +1235,17 @@ IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr) shufOpCode = Js::OpCode::PSHUFD; movOpCode = Js::OpCode::MOVD; break; -#if 0 case Js::OpCode::Simd128_Splat_D2: shufOpCode = Js::OpCode::SHUFPD; movOpCode = Js::OpCode::MOVSD; break; -#endif // 0 + case Js::OpCode::Simd128_Splat_I2: + { + EmitInsertInt64(src1, 0, instr); + instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(68, TyInt8, m_func, true), m_func)); + bSkip = true; + break; + } case Js::OpCode::Simd128_Splat_I8: case Js::OpCode::Simd128_Splat_U8: @@ -930,44 +1332,6 @@ IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr) return prevInstr; } -IR::Instr* LowererMD::Simd128LowerRcp(IR::Instr *instr, bool removeInstr) -{ - Js::OpCode opcode = Js::OpCode::DIVPS; - IR::Opnd *dst, *src1; - dst = instr->GetDst(); - src1 = instr->GetSrc1(); - - Assert(dst && dst->IsRegOpnd()); - Assert(src1 && src1->IsRegOpnd()); - Assert(instr->GetSrc2() == nullptr); - Assert(src1->IsSimd128F4() || src1->IsSimd128I4()); - opcode = Js::OpCode::DIVPS; - -#if 0 - { - Assert(instr->m_opcode == Js::OpCode::Simd128_Rcp_D2 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2); - Assert(src1->IsSimd128D2()); - opcode = Js::OpCode::DIVPD; - x86_allones_mask = (void*)(&X86_ALL_ONES_D2); - } -#endif // 0 - - IR::RegOpnd* tmp = IR::RegOpnd::New(src1->GetType(), m_func); - IR::Instr* movInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllOnesF4Addr(), src1->GetType(), m_func), m_func); - instr->InsertBefore(movInstr); - Legalize(movInstr); - - instr->InsertBefore(IR::Instr::New(opcode, tmp, tmp, src1, m_func)); - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, tmp, m_func)); - if (removeInstr) - { - IR::Instr* prevInstr = instr->m_prev; - instr->Remove(); - return prevInstr; - } - return instr; -} - IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr) { Js::OpCode opcode = Js::OpCode::SQRTPS; @@ -993,69 +1357,6 @@ IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr) return prevInstr; } -IR::Instr* LowererMD::Simd128LowerRcpSqrt(IR::Instr *instr) -{ - Js::OpCode opcode = Js::OpCode::SQRTPS; - Simd128LowerRcp(instr, false); - - opcode = Js::OpCode::SQRTPS; - -#if 0 - else - { - Assert(instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2); - opcode = Js::OpCode::SQRTPD; - } -#endif // 0 - - instr->InsertBefore(IR::Instr::New(opcode, instr->GetDst(), instr->GetDst(), m_func)); - IR::Instr* prevInstr = instr->m_prev; - instr->Remove(); - return prevInstr; -} - -IR::Instr* LowererMD::Simd128LowerSelect(IR::Instr *instr) -{ - Assert(instr->m_opcode == Js::OpCode::Simd128_Select_F4 || instr->m_opcode == Js::OpCode::Simd128_Select_I4 /*|| instr->m_opcode == Js::OpCode::Simd128_Select_D2 */|| - instr->m_opcode == Js::OpCode::Simd128_Select_I8 || instr->m_opcode == Js::OpCode::Simd128_Select_I16 || instr->m_opcode == Js::OpCode::Simd128_Select_U4 || - instr->m_opcode == Js::OpCode::Simd128_Select_U8 || instr->m_opcode == Js::OpCode::Simd128_Select_U16 ); - - IR::Opnd* dst = nullptr; - IR::Opnd* src1 = nullptr; - IR::Opnd* src2 = nullptr; - IR::Opnd* src3 = nullptr; - SList *args = Simd128GetExtendedArgs(instr); - // The number of src opnds should be exact. Missing opnds means type-error, and we should generate an exception throw instead (or globopt does). - Assert(args->Count() == 4); - dst = args->Pop(); - src1 = args->Pop(); // mask - src2 = args->Pop(); // trueValue - src3 = args->Pop(); // falseValue - - Assert(dst->IsRegOpnd() && dst->IsSimd128()); - Assert(src1->IsRegOpnd() && src1->IsSimd128()); - Assert(src2->IsRegOpnd() && src2->IsSimd128()); - Assert(src3->IsRegOpnd() && src3->IsSimd128()); - - IR::RegOpnd *tmp = IR::RegOpnd::New(src1->GetType(), m_func); - IR::Instr *pInstr = nullptr; - // ANDPS tmp1, mask, tvalue - pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, src1, src2, m_func); - instr->InsertBefore(pInstr); - Legalize(pInstr); - // ANDPS dst, mask, fvalue - pInstr = IR::Instr::New(Js::OpCode::ANDNPS, dst, src1, src3, m_func); - instr->InsertBefore(pInstr); - Legalize(pInstr); - // ORPS dst, dst, tmp1 - pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, tmp, m_func); - instr->InsertBefore(pInstr); - - pInstr = instr->m_prev; - instr->Remove(); - return pInstr; -} - IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr) { @@ -1238,6 +1539,17 @@ IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr) switch (instr->m_opcode) { + case Js::OpCode::Simd128_ShRtByScalar_I2: + EmitShiftByScalarI2(instr, IR::HelperSimd128ShRtByScalarI2); + return removeInstr(instr); + case Js::OpCode::Simd128_ShLtByScalar_I2: + opcode = Js::OpCode::PSLLQ; + elementSizeInBytes = 8; + break; + case Js::OpCode::Simd128_ShRtByScalar_U2: + opcode = Js::OpCode::PSRLQ; + elementSizeInBytes = 8; + break; case Js::OpCode::Simd128_ShLtByScalar_I4: case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar opcode = Js::OpCode::PSLLD; @@ -1284,7 +1596,7 @@ IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr) //Shift amount: The shift amout is masked by [ElementSize] * 8 //The masked Shift amount is moved to xmm register //AND shamt, shmask, shamt - //MOVD tmp0, shamt + //MOVD tmp0, shamt IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func); // en-register @@ -1300,7 +1612,8 @@ IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr) if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 || - instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8) + instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8 || + instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I2 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U2) { // shiftOpCode dst, src1, tmp0 pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func); @@ -2221,12 +2534,12 @@ IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr) instr->InsertBefore(pInstr); Legalize(pInstr); } - else + else { //This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for //the fact that tmp2 (tmpbValue) is reused to reduce the number of registers - //needed for this sequence. + //needed for this sequence. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func); instr->InsertBefore(pInstr); @@ -2255,16 +2568,58 @@ IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr) } + +#ifdef ENABLE_WASM_SIMD +IR::Opnd* LowererMD::Simd128CanonicalizeToBoolsBeforeReduction(IR::Instr* instr) +{ + IR::Opnd* src1 = instr->GetSrc1(); + if (m_func->GetJITFunctionBody()->IsWasmFunction()) + { + Js::OpCode cmpOpcode = Js::OpCode::InvalidOpCode; + switch (instr->m_opcode) + { + case Js::OpCode::Simd128_AnyTrue_B4: + case Js::OpCode::Simd128_AnyTrue_B2: + case Js::OpCode::Simd128_AllTrue_B4: + case Js::OpCode::Simd128_AllTrue_B2: + cmpOpcode = Js::OpCode::PCMPEQD; + break; + case Js::OpCode::Simd128_AnyTrue_B8: + case Js::OpCode::Simd128_AllTrue_B8: + cmpOpcode = Js::OpCode::PCMPEQW; + break; + case Js::OpCode::Simd128_AnyTrue_B16: + case Js::OpCode::Simd128_AllTrue_B16: + cmpOpcode = Js::OpCode::PCMPEQB; + break; + default: + Assert(UNREACHED); + } + + IR::RegOpnd * newSrc = IR::RegOpnd::New(src1->GetType(), m_func); + m_lowerer->InsertMove(newSrc, src1, instr); + Simd128CanonicalizeToBools(instr, cmpOpcode, *newSrc); + return newSrc; + } + return src1; +} +#endif + IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr) { Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || - instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16); + instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2); IR::Instr *pInstr; IR::Opnd* dst = instr->GetDst(); +#ifdef ENABLE_WASM_SIMD + IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr); +#else IR::Opnd* src1 = instr->GetSrc1(); +#endif Assert(dst->IsRegOpnd() && dst->IsInt32()); Assert(src1->IsRegOpnd() && src1->IsSimd128()); + // pmovmskb dst, src1 // neg dst // sbb dst, dst @@ -2299,11 +2654,15 @@ IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr) IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr) { Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || - instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16); + instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2); IR::Instr *pInstr; IR::Opnd* dst = instr->GetDst(); +#ifdef ENABLE_WASM_SIMD + IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr); +#else IR::Opnd* src1 = instr->GetSrc1(); +#endif Assert(dst->IsRegOpnd() && dst->IsInt32()); Assert(src1->IsRegOpnd() && src1->IsSimd128()); @@ -2314,15 +2673,35 @@ IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr) pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func); instr->InsertBefore(pInstr); - // cmp dst, 0FFFFh + //horizontally OR into 0th and 2nd positions + //TODO nikolayk revisit the sequence for in64x2.alltrue + IR::Opnd* newDst = dst; + uint cmpMask = 0xFFFF; + if (instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2) + { + cmpMask = 0x0F0F; + IR::RegOpnd* reduceReg = IR::RegOpnd::New(TyInt32, m_func); + pInstr = IR::Instr::New(Js::OpCode::SHR, reduceReg, dst, (IR::IntConstOpnd::New(4, TyInt32, m_func, true)), m_func); + instr->InsertBefore(pInstr); + Legalize(pInstr); + pInstr = IR::Instr::New(Js::OpCode::OR, reduceReg, reduceReg, dst, m_func); + instr->InsertBefore(pInstr); + Legalize(pInstr); + pInstr = IR::Instr::New(Js::OpCode::AND, reduceReg, reduceReg, (IR::IntConstOpnd::New(0x0F0F, TyInt32, m_func, true)), m_func); + instr->InsertBefore(pInstr); + Legalize(pInstr); + newDst = reduceReg; + } + + // cmp dst, cmpMask pInstr = IR::Instr::New(Js::OpCode::CMP, m_func); - pInstr->SetSrc1(dst); - pInstr->SetSrc2(IR::IntConstOpnd::New(0x0FFFF, TyInt32, m_func, true)); + pInstr->SetSrc1(newDst); + pInstr->SetSrc2(IR::IntConstOpnd::New(cmpMask, TyInt32, m_func, true)); instr->InsertBefore(pInstr); Legalize(pInstr); // mov tmp(TyInt8), dst - pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func); + pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, newDst, m_func); instr->InsertBefore(pInstr); Legalize(pInstr); @@ -2385,8 +2764,15 @@ IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr) // CMPLTPS tmp, src, tmp2 // MOVMSKPS mask2, tmp // OR mask1, mask1, mask2 + // check for NaNs + // CMPEQPS tmp, src + // MOVMSKPS mask2, tmp + // NOT mask2 + // AND mask2, 0x00000F + // OR mask1, mask2 + // // CMP mask1, 0 - // JNE $doneLabel + // JEQ $doneLabel newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128I4, m_func), m_func); insertInstr->InsertBefore(newInstr); Legalize(newInstr); @@ -2398,7 +2784,6 @@ IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr) newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegTwoPower31F4Addr(), TySimd128I4, m_func), m_func); insertInstr->InsertBefore(newInstr); Legalize(newInstr); - newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func); insertInstr->InsertBefore(newInstr); Legalize(newInstr); @@ -2406,6 +2791,22 @@ IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr) insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func)); insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func)); + +#ifdef ENABLE_WASM_SIMD + if (m_func->GetJITFunctionBody()->IsWasmFunction()) + { + newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func); + insertInstr->InsertBefore(newInstr); + Legalize(newInstr); + insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func)); + insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::NOT, mask2, mask2, m_func)); + newInstr = IR::Instr::New(Js::OpCode::AND, mask2, mask2, IR::IntConstOpnd::New(0x00000F, TyInt32, m_func), m_func); + insertInstr->InsertBefore(newInstr); + Legalize(newInstr); + insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func)); + } +#endif + newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); newInstr->SetSrc1(mask1); newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func)); @@ -2438,6 +2839,28 @@ IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr) two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func); tmp = IR::RegOpnd::New(TySimd128F4, m_func); tmp2 = IR::RegOpnd::New(TySimd128F4, m_func); + + // check for NaNs + // CMPEQPS tmp, src + // MOVMSKPS mask2, tmp + // AND mask2, 0x00000F + // JNE throw +#ifdef ENABLE_WASM_SIMD + if (m_func->GetJITFunctionBody()->IsWasmFunction()) + { + newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func); + instr->InsertBefore(newInstr); + Legalize(newInstr); + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func)); + newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); + newInstr->SetSrc1(mask); + newInstr->SetSrc2(IR::IntConstOpnd::New(0x0000000F, TyInt32, m_func)); + instr->InsertBefore(newInstr); + Legalize(newInstr); + instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func)); + } +#endif + // any lanes <= -1.0 ? // CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES] // MOVMSKPS mask, tmp @@ -2457,6 +2880,7 @@ IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr) instr->InsertBefore(newInstr); Legalize(newInstr); + instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func)); // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31. @@ -2638,7 +3062,6 @@ IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr) // Case (1) requires static bound check. Case (2) means we are always in bound. // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant - if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32())) { m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError); @@ -3017,6 +3440,34 @@ SList * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr) return args; } + + +IR::Opnd* +LowererMD::EnregisterBoolConst(IR::Instr* instr, IR::Opnd *opnd, IRType type) +{ + + if (opnd->IsIntConstOpnd() || opnd->IsInt64ConstOpnd()) + { + bool isSet = opnd->GetImmediateValue(instr->m_func) != 0; + IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func); + instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(isSet ? -1 : 0, type, m_func, true), m_func)); + return tempReg; + } + + IRType origType = opnd->GetType(); + IR::RegOpnd *tempReg = IR::RegOpnd::New(origType, m_func); + IR::Instr* cmovInstr = IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(0, origType, m_func, true), m_func); + instr->InsertBefore(cmovInstr); + Legalize(cmovInstr); + cmovInstr = IR::Instr::New(Js::OpCode::SUB, tempReg, tempReg, opnd->UseWithNewType(origType, m_func), m_func); + instr->InsertBefore(cmovInstr); + Legalize(cmovInstr); + cmovInstr = IR::Instr::New(Js::OpCode::CMOVS, tempReg, tempReg, IR::IntConstOpnd::New(-1, origType, m_func, true), m_func); + instr->InsertBefore(cmovInstr); + Legalize(cmovInstr); + return tempReg->UseWithNewType(type, m_func); +} + IR::Opnd* LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/) { @@ -3133,28 +3584,27 @@ void LowererMD::Simd128InitOpcodeMap() SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs) SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs) - + SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS); + SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD); + SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD); + SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD); + SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD); + SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD); + SET_SIMDOPCODE(Simd128_Min_D2 , MINPD); + SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD); + SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD); + SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD + SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD + SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD + SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD + SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs) + SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs) #if 0 SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD); SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS); SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD); SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS); - SET_SIMDOPCODE(Simd128_Neg_D2, XORPS); - SET_SIMDOPCODE(Simd128_Add_D2, ADDPD); - SET_SIMDOPCODE(Simd128_Abs_D2, ANDPD); - SET_SIMDOPCODE(Simd128_Sub_D2, SUBPD); - SET_SIMDOPCODE(Simd128_Mul_D2, MULPD); - SET_SIMDOPCODE(Simd128_Div_D2, DIVPD); - SET_SIMDOPCODE(Simd128_Min_D2, MINPD); - SET_SIMDOPCODE(Simd128_Max_D2, MAXPD); - SET_SIMDOPCODE(Simd128_Sqrt_D2, SQRTPD); - SET_SIMDOPCODE(Simd128_Lt_D2, CMPLTPD); // CMPLTPD - SET_SIMDOPCODE(Simd128_LtEq_D2, CMPLEPD); // CMPLEPD - SET_SIMDOPCODE(Simd128_Eq_D2, CMPEQPD); // CMPEQPD - SET_SIMDOPCODE(Simd128_Neq_D2, CMPNEQPD); // CMPNEQPD - SET_SIMDOPCODE(Simd128_Gt_D2, CMPLTPD); // CMPLTPD (swap srcs) - SET_SIMDOPCODE(Simd128_GtEq_D2, CMPLEPD); // CMPLEPD (swap srcs) #endif // 0 SET_SIMDOPCODE(Simd128_And_I8 , PAND); @@ -3217,83 +3667,13 @@ void LowererMD::Simd128InitOpcodeMap() SET_SIMDOPCODE(Simd128_Or_B16 , POR); SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS); SET_SIMDOPCODE(Simd128_Not_B16 , XORPS); + + SET_SIMDOPCODE(Simd128_Add_I2 , PADDQ); + SET_SIMDOPCODE(Simd128_Sub_I2 , PSUBQ); } #undef SIMD_SETOPCODE #undef SIMD_GETOPCODE -// FromVar -void -LowererMD::GenerateCheckedSimdLoad(IR::Instr * instr) -{ - Assert(instr->m_opcode == Js::OpCode::FromVar); - Assert(instr->GetSrc1()->GetType() == TyVar); - Assert(IRType_IsSimd128(instr->GetDst()->GetType())); - - bool checkRequired = instr->HasBailOutInfo(); - IR::LabelInstr * labelHelper = nullptr, * labelDone = nullptr; - IR::Instr * insertInstr = instr, * newInstr; - IR::RegOpnd * src = instr->GetSrc1()->AsRegOpnd(), * dst = instr->GetDst()->AsRegOpnd(); - Assert(!checkRequired || instr->GetBailOutKind() == IR::BailOutSimd128F4Only || instr->GetBailOutKind() == IR::BailOutSimd128I4Only); - - if (checkRequired) - { - labelHelper = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true); - labelDone = IR::LabelInstr::New(Js::OpCode::Label, this->m_func); - instr->InsertBefore(labelHelper); - instr->InsertAfter(labelDone); - insertInstr = labelHelper; - - GenerateObjectTest(instr->GetSrc1(), insertInstr, labelHelper); - - newInstr = IR::Instr::New(Js::OpCode::CMP, instr->m_func); - newInstr->SetSrc1(IR::IndirOpnd::New(instr->GetSrc1()->AsRegOpnd(), 0, TyMachPtr, instr->m_func)); - newInstr->SetSrc2(m_lowerer->LoadVTableValueOpnd(instr, dst->GetType() == TySimd128F4 ? VTableValue::VtableSimd128F4 : VTableValue::VtableSimd128I4)); - insertInstr->InsertBefore(newInstr); - Legalize(newInstr); - insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, labelHelper, this->m_func)); - instr->UnlinkSrc1(); - instr->UnlinkDst(); - this->m_lowerer->GenerateBailOut(instr); - - } - size_t valueOffset = dst->GetType() == TySimd128F4 ? Js::JavascriptSIMDFloat32x4::GetOffsetOfValue() : Js::JavascriptSIMDInt32x4::GetOffsetOfValue(); - Assert(valueOffset < INT_MAX); - newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::IndirOpnd::New(src, static_cast(valueOffset), dst->GetType(), this->m_func), this->m_func); - insertInstr->InsertBefore(newInstr); - - insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, this->m_func)); - // FromVar is converted to BailOut call. Don't remove. -} - -// ToVar -void LowererMD::GenerateSimdStore(IR::Instr * instr) -{ - IR::RegOpnd *dst, *src; - IRType type; - dst = instr->GetDst()->AsRegOpnd(); - src = instr->GetSrc1()->AsRegOpnd(); - type = src->GetType(); - - this->m_lowerer->LoadScriptContext(instr); - IR::Instr * instrCall = IR::Instr::New(Js::OpCode::CALL, instr->GetDst(), - IR::HelperCallOpnd::New(type == TySimd128F4 ? IR::HelperAllocUninitializedSimdF4 : IR::HelperAllocUninitializedSimdI4, this->m_func), this->m_func); - instr->InsertBefore(instrCall); - this->lowererMDArch.LowerCall(instrCall, 0); - - IR::Opnd * valDst; - if (type == TySimd128F4) - { - valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDFloat32x4::GetOffsetOfValue(), TySimd128F4, this->m_func); - } - else - { - valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDInt32x4::GetOffsetOfValue(), TySimd128I4, this->m_func); - } - - instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func)); - instr->Remove(); -} - void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2) { Assert(lanes); diff --git a/lib/Backend/Opnd.h b/lib/Backend/Opnd.h index 0e3f5b82193..123040f4d43 100644 --- a/lib/Backend/Opnd.h +++ b/lib/Backend/Opnd.h @@ -244,6 +244,7 @@ class Opnd bool IsSimd128B8() const { return this->m_type == TySimd128B8; } bool IsSimd128B16() const { return this->m_type == TySimd128B16; } bool IsSimd128D2() const { return this->m_type == TySimd128D2; } + bool IsSimd128I2() const { return this->m_type == TySimd128I2; } bool IsVar() const { return this->m_type == TyVar; } bool IsTaggedInt() const; bool IsTaggedValue() const; diff --git a/lib/Backend/SccLiveness.cpp b/lib/Backend/SccLiveness.cpp index 39afbaa8dcc..06cb673b829 100644 --- a/lib/Backend/SccLiveness.cpp +++ b/lib/Backend/SccLiveness.cpp @@ -402,6 +402,7 @@ SCCLiveness::ProcessSrc(IR::Opnd *src, IR::Instr *instr) lifetime = this->InsertLifetime(stackSym, reg, this->func->m_headInstr->m_next); lifetime->region = this->curRegion; lifetime->isFloat = symOpnd->IsFloat() || symOpnd->IsSimd128(); + } IR::RegOpnd * newRegOpnd = IR::RegOpnd::New(stackSym, reg, symOpnd->GetType(), this->func); diff --git a/lib/Backend/ServerThreadContext.cpp b/lib/Backend/ServerThreadContext.cpp index 7bd9985af5b..7639b6f9d8b 100644 --- a/lib/Backend/ServerThreadContext.cpp +++ b/lib/Backend/ServerThreadContext.cpp @@ -82,7 +82,7 @@ ServerThreadContext::GetImplicitCallFlagsAddr() const return static_cast(m_threadContextData.implicitCallFlagsAddr); } -#if defined(ENABLE_SIMDJS) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) && (defined(_M_IX86) || defined(_M_X64)) intptr_t ServerThreadContext::GetSimdTempAreaAddr(uint8 tempIndex) const { diff --git a/lib/Backend/ServerThreadContext.h b/lib/Backend/ServerThreadContext.h index b909b1910de..5b42325da40 100644 --- a/lib/Backend/ServerThreadContext.h +++ b/lib/Backend/ServerThreadContext.h @@ -22,7 +22,7 @@ class ServerThreadContext : public ThreadContextInfo virtual intptr_t GetThreadStackLimitAddr() const override; -#if defined(ENABLE_SIMDJS) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) && (defined(_M_IX86) || defined(_M_X64)) virtual intptr_t GetSimdTempAreaAddr(uint8 tempIndex) const override; #endif diff --git a/lib/Backend/Sym.h b/lib/Backend/Sym.h index 371afcd5f77..63e6c8fade3 100644 --- a/lib/Backend/Sym.h +++ b/lib/Backend/Sym.h @@ -151,6 +151,7 @@ class StackSym: public Sym bool IsSimd128B8() const { return this->GetType() == TySimd128B8; } bool IsSimd128B16() const { return this->GetType() == TySimd128B16; } bool IsSimd128D2() const { return this->GetType() == TySimd128D2; } + bool IsSimd128I2() const { return this->GetType() == TySimd128I2; } StackSym * GetFloat64EquivSym(Func *func); bool IsFloat64() const { return this->GetType() == TyFloat64; } diff --git a/lib/Backend/amd64/EncoderMD.cpp b/lib/Backend/amd64/EncoderMD.cpp index d1fbd3f7041..d0b88d16323 100644 --- a/lib/Backend/amd64/EncoderMD.cpp +++ b/lib/Backend/amd64/EncoderMD.cpp @@ -980,7 +980,10 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress) } break; } - + case Js::OpCode::PEXTRD: + case Js::OpCode::PEXTRQ: + this->EmitModRM(instr, opr1, this->GetRegEncode(opr2->AsRegOpnd())); + break; case Js::OpCode::BT: case Js::OpCode::BTR: case Js::OpCode::BTS: @@ -1163,6 +1166,7 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress) case Js::OpCode::MOVAPS: case Js::OpCode::MOVUPS: case Js::OpCode::MOVHPD: + case Js::OpCode::MOVLPD: if (!opr1->IsRegOpnd()) { Assert(opr1->IsIndirOpnd() || opr1->IsMemRefOpnd() || opr1->IsSymOpnd()); diff --git a/lib/Backend/amd64/LowererMDArch.cpp b/lib/Backend/amd64/LowererMDArch.cpp index d678aa87b3d..21e558d91fc 100644 --- a/lib/Backend/amd64/LowererMDArch.cpp +++ b/lib/Backend/amd64/LowererMDArch.cpp @@ -115,6 +115,7 @@ LowererMDArch::GetAssignOp(IRType type) case TySimd128B8: case TySimd128B16: case TySimd128D2: + case TySimd128I2: return Js::OpCode::MOVUPS; default: return Js::OpCode::MOV; @@ -1150,9 +1151,15 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal IRType type = src1->GetType(); IR::RegOpnd * indexOpnd = src1->AsIndirOpnd()->GetIndexOpnd(); const uint8 dataWidth = instr->dataWidth; - Assert(isSimdLoad == false || dataWidth == 4 || dataWidth == 8 || dataWidth == 12 || dataWidth == 16); +#if ENABLE_FAST_ARRAYBUFFER + if (CONFIG_FLAG(WasmFastArray) && m_func->GetJITFunctionBody()->IsWasmFunction()) + { + return instr; + } +#endif + #ifdef _WIN32 // For x64, bound checks are required only for SIMD loads. if (isSimdLoad) @@ -1190,7 +1197,23 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal } else { - lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); +#ifdef ENABLE_WASM_SIMD + if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset()) //WASM + { + IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func); + // MOV tmp, cmpOnd + Lowerer::InsertMove(tmp, cmpOpnd, helperLabel); + // ADD tmp, offset + Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel); + // JB helper + Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel); + lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } + else +#endif + { + lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } } Lowerer::InsertBranch(Js::OpCode::Br, loadLabel, helperLabel); @@ -1268,7 +1291,23 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa } else { - lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); +#ifdef ENABLE_WASM_SIMD + if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset()) //WASM + { + IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func); + // MOV tmp, cmpOnd + Lowerer::InsertMove(tmp, cmpOpnd, helperLabel); + // ADD tmp, offset + Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel); + // JB helper + Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel); + lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } + else +#endif + { + lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } } Lowerer::InsertBranch(Js::OpCode::Br, storeLabel, helperLabel); @@ -1783,6 +1822,10 @@ LowererMDArch::LowerEntryInstr(IR::EntryInstr * entryInstr) this->MovArgFromReg2Stack(entryInstr, (RegNum)(RegXMM1 + i), offset, TySimd128D2); offset += 2; break; + case Js::AsmJsVarType::Int64x2: + this->MovArgFromReg2Stack(entryInstr, (RegNum)(RegXMM1 + i), offset, TySimd128I2); + offset += 2; + break; default: Assume(UNREACHED); } @@ -2082,6 +2125,9 @@ LowererMDArch::LowerExitInstr(IR::ExitInstr * exitInstr) case Js::AsmJsRetType::Float64x2: retReg = IR::RegOpnd::New(nullptr, this->GetRegReturnAsmJs(TySimd128D2), TySimd128D2, this->m_func); break; + case Js::AsmJsRetType::Int64x2: + retReg = IR::RegOpnd::New(nullptr, this->GetRegReturnAsmJs(TySimd128I2), TySimd128I2, this->m_func); + break; case Js::AsmJsRetType::Int64: case Js::AsmJsRetType::Signed: retReg = IR::RegOpnd::New(nullptr, this->GetRegReturn(TyMachReg), TyMachReg, this->m_func); diff --git a/lib/Backend/amd64/MdOpCodes.h b/lib/Backend/amd64/MdOpCodes.h index b0e95f2f9ce..ba39ce81e7c 100644 --- a/lib/Backend/amd64/MdOpCodes.h +++ b/lib/Backend/amd64/MdOpCodes.h @@ -132,6 +132,7 @@ MACRO(MOVD, Reg2, None, RNON, f(SPECIAL), o(MOVD), DDST|DNO1 MACRO(MOVHLPS, Reg2, None, RNON, f(SPECIAL), o(MOVHLPS), DDST|DNO16, OLB_0F) MACRO(MOVHPD, Reg2, None, RNON, f(SPECIAL), o(MOVHPD), DDST|DNO16|D66, OLB_0F) +MACRO(MOVLPD, Reg2, None, RNON, f(SPECIAL), o(MOVLPD), DDST|DNO16|D66, OLB_0F) MACRO(MOVLHPS, Reg2, None, RNON, f(SPECIAL), o(MOVLHPS), DDST|DNO16, OLB_0F) MACRO(MOVMSKPD, Reg2, None, RNON, f(SPECIAL), o(MOVMSKPD), DDST|DNO16|D66, OLB_0F) MACRO(MOVMSKPS, Reg2, None, RNON, f(SPECIAL), o(MOVMSKPS), DDST|DNO16, OLB_0F) @@ -168,6 +169,7 @@ MACRO(OR , Reg2, OpSideEffect, R001, f(BINOP), o(OR), DOPEQ|DSE MACRO(ORPS, Reg2, None, R001, f(MODRM), o(ORPS), DOPEQ|DOPEQ|DCOMMOP, OLB_0F) MACRO(PADDB, Reg2, None, RNON, f(MODRM), o(PADDB), DNO16|DOPEQ|D66|DCOMMOP, OLB_0F) MACRO(PADDD, Reg2, None, RNON, f(MODRM), o(PADDD), DNO16|DOPEQ|D66|DCOMMOP, OLB_0F) +MACRO(PADDQ, Reg2, None, RNON, f(MODRM), o(PADDQ), DNO16|DOPEQ|D66|DCOMMOP, OLB_0F) MACRO(PADDW, Reg2, None, RNON, f(MODRM), o(PADDW), DNO16|DOPEQ|D66|DCOMMOP, OLB_0F) MACRO(PADDSB, Reg2, None, RNON, f(MODRM), o(PADDSB), DNO16|DOPEQ|D66|DCOMMOP, OLB_0F) MACRO(PADDSW, Reg2, None, RNON, f(MODRM), o(PADDSW), DNO16|DOPEQ|D66|DCOMMOP, OLB_0F) @@ -192,18 +194,23 @@ MACRO(POR, Reg2, None, RNON, f(MODRM), o(POR), DNO16|DO MACRO(PSHUFD, Reg3, None, RNON, f(MODRM), o(PSHUFD), DDST|DNO16|D66|DSSE, OLB_0F) MACRO(PEXTRW, Reg3, None, RNON, f(MODRM), o(PEXTRW), DDST|DNO16|D66|DSSE, OLB_0F) +MACRO(PEXTRD, Reg3, None, RNON, f(SPECIAL), o(PEXTRD), DDST|DNO16|DSSE|D66, OLB_0F3A) +MACRO(PEXTRQ, Reg3, None, RNON, f(SPECIAL), o(PEXTRQ), DDST|DNO16|D66|DREXSRC|DSSE,OLB_0F3A) MACRO(PSLLDQ, Reg2, None, R111, f(SPECIAL), o(PSLLDQ), DDST|DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSRLDQ, Reg2, None, R011, f(SPECIAL), o(PSRLDQ), DDST|DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSLLW, Reg2, None, R110, f(SPECIAL), o(PSLLW), DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSLLD, Reg2, None, R110, f(SPECIAL), o(PSLLD), DNO16|DOPEQ|D66|DSSE, OLB_0F) +MACRO(PSLLQ, Reg2, None, RNON, f(MODRM), o(PSLLQ), DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSRAW, Reg2, None, R100, f(SPECIAL), o(PSRAW), DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSRAD, Reg2, None, R100, f(SPECIAL), o(PSRAD), DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSRLW, Reg2, None, R010, f(SPECIAL), o(PSRLW), DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSRLD, Reg2, None, R010, f(SPECIAL), o(PSRLD), DNO16|DOPEQ|D66|DSSE, OLB_0F) +MACRO(PSRLQ, Reg2, None, RNON, f(MODRM), o(PSRLQ), DNO16|DOPEQ|D66|DSSE, OLB_0F) MACRO(PSUBB, Reg2, None, RNON, f(MODRM), o(PSUBB), DNO16|DOPEQ|D66, OLB_0F) MACRO(PSUBD, Reg2, None, RNON, f(MODRM), o(PSUBD), DNO16|DOPEQ|D66, OLB_0F) +MACRO(PSUBQ, Reg2, None, RNON, f(MODRM), o(PSUBQ), DNO16|DOPEQ|D66, OLB_0F) MACRO(PSUBW, Reg2, None, RNON, f(MODRM), o(PSUBW), DNO16|DOPEQ|D66, OLB_0F) MACRO(PSUBSB, Reg2, None, RNON, f(MODRM), o(PSUBSB), DNO16|DOPEQ|D66, OLB_0F) MACRO(PSUBSW, Reg2, None, RNON, f(MODRM), o(PSUBSW), DNO16|DOPEQ|D66, OLB_0F) @@ -260,6 +267,8 @@ MACRO(XCHG, Reg2, None, R000, f(XCHG), o(XCHG), DOPEQ, MACRO(XOR, Reg2, OpSideEffect, R110, f(BINOP), o(XOR), DOPEQ|DSETCC|DCOMMOP, OLB_NONE) MACRO(XORPS, Reg3, None, RNON, f(MODRM), o(XORPS), DNO16|DOPEQ|DCOMMOP, OLB_0F) MACRO(PINSRW, Reg2, None, RNON, f(MODRM), o(PINSRW), DDST|DNO16|DSSE|D66, OLB_0F) +MACRO(PINSRD, Reg3, None, RNON, f(MODRM), o(PINSRD), DDST|DNO16|DSSE|D66, OLB_0F3A) +MACRO(PINSRQ, Reg3, None, RNON, f(MODRM), o(PINSRQ), DDST|DNO16|D66|DREXSRC|DSSE,OLB_0F3A) MACRO(POPCNT, Reg2, None, RNON, f(MODRM), o(POPCNT), DF3|DSETCC|DDST, OLB_0F) #undef o diff --git a/lib/Backend/amd64/X64Encode.h b/lib/Backend/amd64/X64Encode.h index 0d1f84002f9..2986ad81f83 100644 --- a/lib/Backend/amd64/X64Encode.h +++ b/lib/Backend/amd64/X64Encode.h @@ -233,6 +233,7 @@ enum Forms : BYTE #define OPBYTE_MOVHLPS {0x12} // modrm #define OPBYTE_MOVHPD {0x16} // special +#define OPBYTE_MOVLPD {0x12} // special #define OPBYTE_MOVLHPS {0x16} // modrm #define OPBYTE_MOVMSKPD {0x50} // modrm #define OPBYTE_MOVMSKPS {0x50} // modrm @@ -264,6 +265,7 @@ enum Forms : BYTE #define OPBYTE_ORPS {0x56} // modrm #define OPBYTE_PADDB {0xfc} // modrm #define OPBYTE_PADDD {0xfe} // modrm +#define OPBYTE_PADDQ {0xd4} // modrm #define OPBYTE_PADDW {0xfd} // modrm #define OPBYTE_PADDSB {0xec} // modrm #define OPBYTE_PADDSW {0xed} // modrm @@ -292,19 +294,26 @@ enum Forms : BYTE #define OPBYTE_POPCNT {0xB8} // modrm #define OPBYTE_PSHUFD {0x70} // special #define OPBYTE_PEXTRW {0xc5} // special +#define OPBYTE_PEXTRD {0x16} // special +#define OPBYTE_PEXTRQ {0x16} // special #define OPBYTE_PINSRW {0xc4} // special +#define OPBYTE_PINSRD {0x22} // special +#define OPBYTE_PINSRQ {0x22} // special #define OPBYTE_PSLLDQ {0x73} // mmxshift #define OPBYTE_PSRLDQ {0x73} // mmxshift #define OPBYTE_PSLLW {0x71} // mmx lane shift #define OPBYTE_PSLLD {0x72} // mmx lane shift +#define OPBYTE_PSLLQ {0xf3} // mmx lane shift #define OPBYTE_PSRAW {0x71} // mmx lane shift #define OPBYTE_PSRAD {0x72} // mmx lane shift #define OPBYTE_PSRLW {0x71} // mmx lane shift #define OPBYTE_PSRLD {0x72} // mmx lane shift +#define OPBYTE_PSRLQ {0xd3} // mmx lane shift #define OPBYTE_PSUBB {0xf8} // modrm #define OPBYTE_PSUBD {0xfa} // modrm +#define OPBYTE_PSUBQ {0xfb} // modrm #define OPBYTE_PSUBW {0xf9} // modrm #define OPBYTE_PSUBSB {0xe8} // modrm #define OPBYTE_PSUBSW {0xe9} // modrm diff --git a/lib/Backend/amd64/md.h b/lib/Backend/amd64/md.h index a482ace41b3..ff865ec0b6b 100644 --- a/lib/Backend/amd64/md.h +++ b/lib/Backend/amd64/md.h @@ -23,6 +23,7 @@ const IRType TyMachSimd128B4 = TySimd128B4; const IRType TyMachSimd128B8 = TySimd128B8; const IRType TyMachSimd128B16 = TySimd128B16; const IRType TyMachSimd128D2 = TySimd128D2; +const IRType TyMachSimd128I2 = TySimd128I2; const DWORD EMIT_BUFFER_ALIGNMENT = 16; const DWORD INSTR_ALIGNMENT = 1; diff --git a/lib/Backend/i386/EncoderMD.cpp b/lib/Backend/i386/EncoderMD.cpp index e4f7a4d5283..22dd76f7d25 100644 --- a/lib/Backend/i386/EncoderMD.cpp +++ b/lib/Backend/i386/EncoderMD.cpp @@ -1070,6 +1070,7 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress) case Js::OpCode::MOVAPS: case Js::OpCode::MOVUPS: case Js::OpCode::MOVHPD: + case Js::OpCode::MOVLPD: if (!opr1->IsRegOpnd()) { Assert(opr2->IsRegOpnd()); @@ -1137,6 +1138,9 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress) continue; } break; + case Js::OpCode::PEXTRD: + this->EmitModRM(instr, opr1, this->GetRegEncode(opr2->AsRegOpnd())); + break; case Js::OpCode::BT: case Js::OpCode::BTR: /* diff --git a/lib/Backend/i386/LowererMDArch.cpp b/lib/Backend/i386/LowererMDArch.cpp index 9f53a10b017..e36b7e8f125 100644 --- a/lib/Backend/i386/LowererMDArch.cpp +++ b/lib/Backend/i386/LowererMDArch.cpp @@ -109,6 +109,7 @@ LowererMDArch::GetAssignOp(IRType type) case TySimd128B8: case TySimd128B16: case TySimd128D2: + case TySimd128I2: return Js::OpCode::MOVUPS; default: return Js::OpCode::MOV; @@ -964,7 +965,22 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal } else { - lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); +#ifdef ENABLE_WASM_SIMD + if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset()) //WASM.SIMD + { + IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func); + Lowerer::InsertMove(tmp, cmpOpnd, helperLabel); + // ADD tmp, offset + Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel); + // JB helper + Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel); + lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } + else +#endif + { + lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } } Lowerer::InsertBranch(Js::OpCode::Br, loadLabel, helperLabel); @@ -1026,7 +1042,22 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa } else { - lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); +#ifdef ENABLE_WASM_SIMD + if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset()) //WASM.SIMD + { + IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func); + Lowerer::InsertMove(tmp, cmpOpnd, helperLabel); + // ADD tmp, offset + Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel); + // JB helper + Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel); + lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } + else +#endif + { + lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel); + } } if (isSimdStore) @@ -1806,6 +1837,7 @@ LowererMDArch::ChangeToAssignInt64(IR::Instr * instr) } + void LowererMDArch::EmitInt64Instr(IR::Instr *instr) { diff --git a/lib/Backend/i386/MdOpCodes.h b/lib/Backend/i386/MdOpCodes.h index 802f8720416..fb293a9c25e 100644 --- a/lib/Backend/i386/MdOpCodes.h +++ b/lib/Backend/i386/MdOpCodes.h @@ -129,6 +129,7 @@ MACRO(MOVD, Reg2, None, RNON, f(SPECIAL), o(MOVD), DDST MACRO(MOVHLPS, Reg2, None, RNON, f(SPECIAL), o(MOVHLPS), DDST|DNO16|DZEROF, OLB_NONE) MACRO(MOVHPD, Reg2, None, RNON, f(SPECIAL), o(MOVHPD), DDST|DNO16|D66, OLB_NONE) +MACRO(MOVLPD, Reg2, None, RNON, f(SPECIAL), o(MOVLPD), DDST|DNO16|D66, OLB_NONE) MACRO(MOVLHPS, Reg2, None, RNON, f(SPECIAL), o(MOVLHPS), DDST|DNO16|DZEROF, OLB_NONE) MACRO(MOVMSKPD, Reg2, None, RNON, f(SPECIAL), o(MOVMSKPD), DDST|DNO16|D66, OLB_NONE) MACRO(MOVMSKPS, Reg2, None, RNON, f(SPECIAL), o(MOVMSKPS), DDST|DNO16|DZEROF, OLB_NONE) @@ -157,6 +158,7 @@ MACRO(OR , Reg2, None, R001, f(BINOP), o(OR), DOPE MACRO(ORPS, Reg2, None, R001, f(MODRM), o(ORPS), DOPEQ|DOPEQ|DZEROF|DCOMMOP, OLB_NONE) MACRO(PADDB, Reg2, None, RNON, f(MODRM), o(PADDB), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) MACRO(PADDD, Reg2, None, RNON, f(MODRM), o(PADDD), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) +MACRO(PADDQ, Reg2, None, RNON, f(MODRM), o(PADDQ), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) MACRO(PADDW, Reg2, None, RNON, f(MODRM), o(PADDW), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) MACRO(PADDSB, Reg2, None, RNON, f(MODRM), o(PADDSB), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) MACRO(PADDSW, Reg2, None, RNON, f(MODRM), o(PADDSW), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) @@ -182,18 +184,23 @@ MACRO(POP, Reg1, OpSideEffect, R000, f(PSHPOP), o(POP), DDST MACRO(POR, Reg2, None, RNON, f(MODRM), o(POR), DNO16|DOPEQ|D66|DCOMMOP, OLB_NONE) MACRO(PSHUFD, Reg3, None, RNON, f(MODRM), o(PSHUFD), DDST|DNO16|D66|DSSE, OLB_NONE) MACRO(PEXTRW, Reg3, None, RNON, f(MODRM), o(PEXTRW), DDST|DNO16|D66|DSSE, OLB_NONE) +MACRO(PEXTRD, Reg3, None, RNON, f(SPECIAL), o(PEXTRD), DDST|DNO16|DSSE|D66, OLB_0F3A) MACRO(PINSRW, Reg3, None, RNON, f(MODRM), o(PINSRW), DDST|DNO16|D66|DSSE, OLB_NONE) +MACRO(PINSRD, Reg3, None, RNON, f(MODRM), o(PINSRD), DDST|DNO16|D66|DSSE, OLB_0F3A) MACRO(PSLLDQ, Reg2, None, R111, f(SPECIAL), o(PSLLDQ), DDST|DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSRLDQ, Reg2, None, R011, f(SPECIAL), o(PSRLDQ), DDST|DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSRLW, Reg2, None, R010, f(SPECIAL), o(PSRLW), DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSRLD, Reg2, None, R010, f(SPECIAL), o(PSRLD), DNO16|DOPEQ|D66|DSSE, OLB_NONE) +MACRO(PSRLQ, Reg2, None, RNON, f(MODRM), o(PSRLQ), DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSRAW, Reg2, None, R100, f(SPECIAL), o(PSRAW), DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSRAD, Reg2, None, R100, f(SPECIAL), o(PSRAD), DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSLLW, Reg2, None, R110, f(SPECIAL), o(PSLLW), DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSLLD, Reg2, None, R110, f(SPECIAL), o(PSLLD), DNO16|DOPEQ|D66|DSSE, OLB_NONE) +MACRO(PSLLQ, Reg2, None, RNON, f(MODRM), o(PSLLQ), DNO16|DOPEQ|D66|DSSE, OLB_NONE) MACRO(PSUBB, Reg2, None, RNON, f(MODRM), o(PSUBB), DNO16|DOPEQ|D66, OLB_NONE) MACRO(PSUBD, Reg2, None, RNON, f(MODRM), o(PSUBD), DNO16|DOPEQ|D66, OLB_NONE) +MACRO(PSUBQ, Reg2, None, RNON, f(MODRM), o(PSUBQ), DNO16|DOPEQ|D66, OLB_NONE) MACRO(PSUBW, Reg2, None, RNON, f(MODRM), o(PSUBW), DNO16|DOPEQ|D66, OLB_NONE) MACRO(PSUBSB, Reg2, None, RNON, f(MODRM), o(PSUBSB), DNO16|DOPEQ|D66, OLB_NONE) MACRO(PSUBSW, Reg2, None, RNON, f(MODRM), o(PSUBSW), DNO16|DOPEQ|D66, OLB_NONE) diff --git a/lib/Backend/i386/X86Encode.h b/lib/Backend/i386/X86Encode.h index ad90914e685..da8adf4a04a 100644 --- a/lib/Backend/i386/X86Encode.h +++ b/lib/Backend/i386/X86Encode.h @@ -218,6 +218,7 @@ enum Forms : BYTE #define OPBYTE_MOVHLPS {0x12} // modrm #define OPBYTE_MOVHPD {0x16} // special +#define OPBYTE_MOVLPD {0x12} // special #define OPBYTE_MOVLHPS {0x16} // modrm #define OPBYTE_MOVMSKPD {0x50} // modrm #define OPBYTE_MOVMSKPS {0x50} // modrm @@ -245,6 +246,7 @@ enum Forms : BYTE #define OPBYTE_ORPS {0x56} // modrm #define OPBYTE_PADDB {0xfc} // modrm #define OPBYTE_PADDD {0xfe} // modrm +#define OPBYTE_PADDQ {0xd4} // modrm #define OPBYTE_PADDW {0xfd} // modrm #define OPBYTE_PADDSB {0xec} // modrm #define OPBYTE_PADDSW {0xed} // modrm @@ -273,19 +275,24 @@ enum Forms : BYTE #define OPBYTE_POPCNT {0xB8} // modrm #define OPBYTE_PSHUFD {0x70} // special #define OPBYTE_PEXTRW {0xc5} // special +#define OPBYTE_PEXTRD {0x16} // special #define OPBYTE_PINSRW {0xc4} // special +#define OPBYTE_PINSRD {0x22} // special #define OPBYTE_PSLLDQ {0x73} // mmxshift #define OPBYTE_PSRLDQ {0x73} // mmxshift #define OPBYTE_PSLLW {0x71} // mmx lane shift #define OPBYTE_PSLLD {0x72} // mmx lane shift +#define OPBYTE_PSLLQ {0xf3} // mmx lane shift #define OPBYTE_PSRAW {0x71} // mmx lane shift #define OPBYTE_PSRAD {0x72} // mmx lane shift #define OPBYTE_PSRLW {0x71} // mmx lane shift #define OPBYTE_PSRLD {0x72} // mmx lane shift +#define OPBYTE_PSRLQ {0xd3} // mmx lane shift #define OPBYTE_PSUBB {0xf8} // modrm #define OPBYTE_PSUBD {0xfa} // modrm +#define OPBYTE_PSUBQ {0xfb} // modrm #define OPBYTE_PSUBW {0xf9} // modrm #define OPBYTE_PSUBSB {0xe8} // modrm #define OPBYTE_PSUBSW {0xe9} // modrm diff --git a/lib/Backend/i386/md.h b/lib/Backend/i386/md.h index 2a862f3ecfe..dee0d225f8a 100644 --- a/lib/Backend/i386/md.h +++ b/lib/Backend/i386/md.h @@ -21,6 +21,7 @@ const IRType TyMachSimd128B4 = TySimd128B4; const IRType TyMachSimd128B8 = TySimd128B8; const IRType TyMachSimd128B16 = TySimd128B16; const IRType TyMachSimd128D2 = TySimd128D2; +const IRType TyMachSimd128I2 = TySimd128I2; const DWORD EMIT_BUFFER_ALIGNMENT = 16; const DWORD INSTR_ALIGNMENT = 1; diff --git a/lib/Common/CommonDefines.h b/lib/Common/CommonDefines.h index 339818d0d30..a12760157a8 100644 --- a/lib/Common/CommonDefines.h +++ b/lib/Common/CommonDefines.h @@ -710,6 +710,7 @@ // xplat-todo: once all the wasm tests are passing on xplat, enable it for release builds #if defined(_WIN32) || (defined(__clang__) && defined(ENABLE_DEBUG_CONFIG_OPTIONS)) #define ENABLE_WASM +#define ENABLE_WASM_SIMD #ifdef CAN_BUILD_WABT #define ENABLE_WABT diff --git a/lib/Common/ConfigFlagsList.h b/lib/Common/ConfigFlagsList.h index 0018a75c601..cfdbaf64f24 100644 --- a/lib/Common/ConfigFlagsList.h +++ b/lib/Common/ConfigFlagsList.h @@ -411,6 +411,7 @@ PHASE(All) #define DEFAULT_CONFIG_WasmMathExFilter (false) #define DEFAULT_CONFIG_WasmIgnoreResponse (false) #define DEFAULT_CONFIG_WasmMaxTableSize (10000000) +#define DEFAULT_CONFIG_WasmSimd (false) #define DEFAULT_CONFIG_WasmSignExtends (false) #define DEFAULT_CONFIG_BgJitDelayFgBuffer (0) #define DEFAULT_CONFIG_BgJitPendingFuncCap (31) @@ -895,6 +896,9 @@ FLAGNR(Boolean, WasmFold , "Enable i32/i64 const folding", DEFAULT_ FLAGNR(Boolean, WasmIgnoreResponse , "Ignore the type of the Response object", DEFAULT_CONFIG_WasmIgnoreResponse) FLAGNR(Number, WasmMaxTableSize , "Maximum size allowed to the WebAssembly.Table", DEFAULT_CONFIG_WasmMaxTableSize) FLAGNR(Boolean, WasmSignExtends , "Use new WebAssembly sign extension operators", DEFAULT_CONFIG_WasmSignExtends) +#ifdef ENABLE_WASM_SIMD +FLAGNR(Boolean, WasmSimd , "Enable SIMD in WebAssembly", DEFAULT_CONFIG_WasmSimd) +#endif #ifdef ENABLE_SIMDJS #ifndef COMPILE_DISABLE_Simdjs diff --git a/lib/Runtime/Base/ThreadContext.cpp b/lib/Runtime/Base/ThreadContext.cpp index 04c7775b093..efb57c85efa 100644 --- a/lib/Runtime/Base/ThreadContext.cpp +++ b/lib/Runtime/Base/ThreadContext.cpp @@ -336,7 +336,7 @@ ThreadContext::GetThreadStackLimitAddr() const return (intptr_t)GetAddressOfStackLimitForCurrentThread(); } -#if ENABLE_NATIVE_CODEGEN && defined(ENABLE_SIMDJS) && (defined(_M_IX86) || defined(_M_X64)) +#if ENABLE_NATIVE_CODEGEN && (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) && (defined(_M_IX86) || defined(_M_X64)) intptr_t ThreadContext::GetSimdTempAreaAddr(uint8 tempIndex) const { @@ -2010,7 +2010,7 @@ ThreadContext::EnsureJITThreadContext(bool allowPrereserveAlloc) contextData.scriptStackLimit = GetScriptStackLimit(); contextData.isThreadBound = IsThreadBound(); contextData.allowPrereserveAlloc = allowPrereserveAlloc; -#if defined(ENABLE_SIMDJS) && (_M_IX86 || _M_AMD64) +#if (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) && (_M_IX86 || _M_AMD64) contextData.simdTempAreaBaseAddr = (intptr_t)GetSimdTempArea(); #endif diff --git a/lib/Runtime/Base/ThreadContext.h b/lib/Runtime/Base/ThreadContext.h index 12cdd19e5e6..668b141efa8 100644 --- a/lib/Runtime/Base/ThreadContext.h +++ b/lib/Runtime/Base/ThreadContext.h @@ -412,7 +412,9 @@ class ThreadContext sealed : void AddSimdFuncInfo(Js::OpCode op, Js::FunctionInfo *funcInfo); Js::OpCode GetSimdOpcodeFromFuncInfo(Js::FunctionInfo * funcInfo); void GetSimdFuncSignatureFromOpcode(Js::OpCode op, SimdFuncSignature &funcSignature); +#endif +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) #if _M_IX86 || _M_AMD64 // auxiliary SIMD values in memory to help JIT'ed code. E.g. used for Int8x16 shuffle. _x86_SIMDValue X86_TEMP_SIMD[SIMD_TEMP_SIZE]; @@ -1324,7 +1326,7 @@ class ThreadContext sealed : virtual intptr_t GetThreadStackLimitAddr() const override; -#if ENABLE_NATIVE_CODEGEN && defined(ENABLE_SIMDJS) && (defined(_M_IX86) || defined(_M_X64)) +#if ENABLE_NATIVE_CODEGEN && (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) && (defined(_M_IX86) || defined(_M_X64)) virtual intptr_t GetSimdTempAreaAddr(uint8 tempIndex) const override; #endif diff --git a/lib/Runtime/Base/ThreadContextInfo.h b/lib/Runtime/Base/ThreadContextInfo.h index d5c6182d3aa..2412491090b 100644 --- a/lib/Runtime/Base/ThreadContextInfo.h +++ b/lib/Runtime/Base/ThreadContextInfo.h @@ -103,7 +103,7 @@ class ThreadContextInfo virtual ptrdiff_t GetCRTBaseAddressDifference() const = 0; #if ENABLE_NATIVE_CODEGEN -#if defined(ENABLE_SIMDJS) && (defined(_M_IX86) || defined(_M_X64)) +#if (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) && (defined(_M_IX86) || defined(_M_X64)) virtual intptr_t GetSimdTempAreaAddr(uint8 tempIndex) const = 0; #endif virtual intptr_t GetBailOutRegisterSaveSpaceAddr() const = 0; diff --git a/lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp b/lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp index 37b9f236cd5..919705a1bed 100644 --- a/lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp +++ b/lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp @@ -61,6 +61,9 @@ namespace Js case AsmJsType::Float64x2: Output::Print(_u("D2(In%hu)"), i); break; + case AsmJsType::Int64x2: + Output::Print(_u("I2(In%hu)"), i); + break; } } else @@ -369,6 +372,12 @@ namespace Js Output::Print(_u(" D2_%d "), (int)reg); } + // Int64x2 + void AsmJsByteCodeDumper::DumpInt64x2Reg(RegSlot reg) + { + Output::Print(_u(" I2_%d "), (int)reg); + } + template void AsmJsByteCodeDumper::DumpElementSlot(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader) { @@ -979,6 +988,19 @@ namespace Js DumpI4(data->C1); } + template + void AsmJsByteCodeDumper::DumpAsmShuffle(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader) + { + DumpFloat32x4Reg(data->R0); + DumpFloat32x4Reg(data->R1); + DumpFloat32x4Reg(data->R2); + const uint NUM_LANES = 16; + for (uint i = 0; i < NUM_LANES; i++) + { + DumpU4(data->INDICES[i]); + } + } + template void AsmJsByteCodeDumper::DumpAsmSimdTypedArr(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader) { @@ -1024,6 +1046,7 @@ namespace Js #define SIMD_DUMP_ARR_U16 DumpUint8x16Reg #define SIMD_DUMP_ARR_F4 DumpFloat32x4Reg #define SIMD_DUMP_ARR_D2 DumpFloat64x2Reg +#define SIMD_DUMP_ARR_I2 DumpInt64x2Reg #define SIMD_DUMP_REG(type) SIMD_DUMP_ARR_##type(data->Value) #define SIMD_DUMP_ARR_VALUE(type) \ case OpCodeAsmJs::Simd128_LdArr_##type:\ diff --git a/lib/Runtime/ByteCode/AsmJsByteCodeDumper.h b/lib/Runtime/ByteCode/AsmJsByteCodeDumper.h index b2c011ebfce..1b695690b23 100644 --- a/lib/Runtime/ByteCode/AsmJsByteCodeDumper.h +++ b/lib/Runtime/ByteCode/AsmJsByteCodeDumper.h @@ -34,6 +34,7 @@ namespace Js { static void DumpBool8x16Reg(RegSlot reg); static void DumpFloat64x2Reg(RegSlot reg); + static void DumpInt64x2Reg(RegSlot reg); static void DumpRegReg(RegSlot reg) { DumpReg(reg); } static void DumpIntConstReg(int val) { DumpI4(val); } diff --git a/lib/Runtime/ByteCode/AsmJsByteCodeWriter.cpp b/lib/Runtime/ByteCode/AsmJsByteCodeWriter.cpp index e55d57dc260..e04455938b3 100644 --- a/lib/Runtime/ByteCode/AsmJsByteCodeWriter.cpp +++ b/lib/Runtime/ByteCode/AsmJsByteCodeWriter.cpp @@ -4,6 +4,7 @@ //------------------------------------------------------------------------------------------------------- #include "RuntimeByteCodePch.h" +#include "../../WasmReader/WasmParseTree.h" #if defined(ASMJS_PLAT) || defined(ENABLE_WASM) @@ -83,6 +84,23 @@ namespace Js } return false; } + + template + bool AsmJsByteCodeWriter::TryWriteFloat32x4_IntConst4(OpCodeAsmJs op, RegSlot R0, int C1, int C2, int C3, int C4) + { + OpLayoutT_Float32x4_IntConst4 layout; + if (SizePolicy::Assign(layout.F4_0, R0) && + SizePolicy::Assign(layout.C1, C1) && + SizePolicy::Assign(layout.C2, C2) && + SizePolicy::Assign(layout.C3, C3) && + SizePolicy::Assign(layout.C4, C4) + ) + { + m_byteCodeData.EncodeT(op, &layout, sizeof(layout), this); + return true; + } + return false; + } template bool AsmJsByteCodeWriter::TryWriteAsmReg2(OpCodeAsmJs op, RegSlot R0, RegSlot R1) { @@ -233,6 +251,19 @@ namespace Js return false; } + template + bool AsmJsByteCodeWriter::TryWriteAsmShuffle(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, uint8 indices[]) + { + OpLayoutT_AsmShuffle layout; + if (SizePolicy::Assign(layout.R0, R0) && SizePolicy::Assign(layout.R1, R1) && SizePolicy::Assign(layout.R2, R2)) + { + memcpy_s(layout.INDICES, Wasm::Simd::MAX_LANES, indices, Wasm::Simd::MAX_LANES); + m_byteCodeData.EncodeT(op, &layout, sizeof(layout), this); + return true; + } + return false; + } + template bool AsmJsByteCodeWriter::TryWriteInt1Const1(OpCodeAsmJs op, RegSlot R0, int C1) { @@ -392,11 +423,11 @@ namespace Js } template - bool AsmJsByteCodeWriter::TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType) + bool AsmJsByteCodeWriter::TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset) { OpLayoutT_AsmSimdTypedArr layout; if (SizePolicy::Assign(layout.Value, value) && SizePolicy::template Assign(layout.ViewType, viewType) - && SizePolicy::Assign(layout.SlotIndex, slotIndex) && SizePolicy::template Assign(layout.DataWidth, dataWidth)) + && SizePolicy::Assign(layout.SlotIndex, slotIndex) && SizePolicy::template Assign(layout.DataWidth, dataWidth) && SizePolicy::Assign(layout.Offset, offset)) { m_byteCodeData.EncodeT(op, &layout, sizeof(layout), this); return true; @@ -500,12 +531,22 @@ namespace Js MULTISIZE_LAYOUT_WRITE(AsmReg18, op, R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17); } + void AsmJsByteCodeWriter::AsmShuffle(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, uint8 indices[]) + { + MULTISIZE_LAYOUT_WRITE(AsmShuffle, op, R0, R1, R2, indices); + } + void AsmJsByteCodeWriter::AsmReg19(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16, RegSlot R17, RegSlot R18) { MULTISIZE_LAYOUT_WRITE(AsmReg19, op, R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18); } + void AsmJsByteCodeWriter::WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3) + { + MULTISIZE_LAYOUT_WRITE(Float32x4_IntConst4, op, R0, C0, C1, C2, C3); + } + void AsmJsByteCodeWriter::AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op) { CheckOpen(); @@ -575,10 +616,10 @@ namespace Js MULTISIZE_LAYOUT_WRITE(WasmMemAccess, op, value, slotIndex, offset, viewType); } - void AsmJsByteCodeWriter::AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType) + void AsmJsByteCodeWriter::AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset) { Assert(dataWidth >= 4 && dataWidth <= 16); - MULTISIZE_LAYOUT_WRITE(AsmSimdTypedArr, op, value, slotIndex, dataWidth, viewType); + MULTISIZE_LAYOUT_WRITE(AsmSimdTypedArr, op, value, slotIndex, dataWidth, viewType, offset); } void AsmJsByteCodeWriter::AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, uint32 slotId) diff --git a/lib/Runtime/ByteCode/AsmJsByteCodeWriter.h b/lib/Runtime/ByteCode/AsmJsByteCodeWriter.h index 9e258758966..9febcb08185 100644 --- a/lib/Runtime/ByteCode/AsmJsByteCodeWriter.h +++ b/lib/Runtime/ByteCode/AsmJsByteCodeWriter.h @@ -45,31 +45,34 @@ namespace Js IMP_IWASM void MarkAsmJsLabel(ByteCodeLabel labelID); IMP_IWASM void ExitLoop(uint loopId); + IMP_IWASM void AsmReg4(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3); + IMP_IWASM void AsmReg5(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4); + IMP_IWASM void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8); + IMP_IWASM void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, + RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16); + IMP_IWASM void AsmShuffle(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, uint8 indices[]); + IMP_IWASM void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0); + IMP_IWASM void WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3); #ifdef WASM_BYTECODE_WRITER // We don't want to expose api not in IWasmByteCodeWriter, but it's easier to compile them anyway private: #endif uint EnterLoop(ByteCodeLabel loopEntrance); - void AsmReg4(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3); - void AsmReg5(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4); void AsmReg6(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5); void AsmReg7(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6); - void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8); void AsmReg10(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, RegSlot R9); void AsmReg11(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, RegSlot R9, RegSlot R10); - void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, - RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16); void AsmReg18(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16, RegSlot R17); void AsmReg19(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, - RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16, RegSlot R17, RegSlot R18); + RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16, RegSlot R17, RegSlot R18); void AsmBrReg2(OpCodeAsmJs op, ByteCodeLabel labelID, RegSlot R1, RegSlot R2); void AsmTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, ArrayBufferView::ViewType viewType); - void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType); private: void AsmJsUnsigned1(OpCodeAsmJs op, uint C1); template bool TryWriteAsmReg1(OpCodeAsmJs op, RegSlot R0); + template bool TryWriteFloat32x4_IntConst4(OpCodeAsmJs op, RegSlot R0, int C1, int C2, int C3, int C4); template bool TryWriteAsmReg2(OpCodeAsmJs op, RegSlot R0, RegSlot R1); template bool TryWriteAsmReg3(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2); template bool TryWriteAsmReg4(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3); @@ -87,6 +90,8 @@ namespace Js template bool TryWriteAsmReg19(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16, RegSlot R17, RegSlot R18); + template bool TryWriteAsmShuffle(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, uint8 indices[]); + template bool TryWriteInt1Const1(OpCodeAsmJs op, RegSlot R0, int C1); template bool TryWriteReg1IntConst1(OpCodeAsmJs op, RegSlot R0, int C1); template bool TryWriteLong1Const1(OpCodeAsmJs op, RegSlot R0, int64 C1); @@ -99,7 +104,7 @@ namespace Js template bool TryWriteAsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, uint32 slotId); template bool TryWriteWasmMemAccess(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint32 offset, ArrayBufferView::ViewType viewType); template bool TryWriteAsmTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, ArrayBufferView::ViewType viewType); - template bool TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType); + template bool TryWriteAsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset); template bool TryWriteAsmJsUnsigned1(OpCodeAsmJs op, uint C1); void AddJumpOffset(Js::OpCodeAsmJs op, ByteCodeLabel labelId, uint fieldByteOffset); diff --git a/lib/Runtime/ByteCode/IWasmByteCodeWriter.h b/lib/Runtime/ByteCode/IWasmByteCodeWriter.h index 711adf70327..d18eeabc05a 100644 --- a/lib/Runtime/ByteCode/IWasmByteCodeWriter.h +++ b/lib/Runtime/ByteCode/IWasmByteCodeWriter.h @@ -30,6 +30,15 @@ namespace Js virtual void AsmReg1(OpCodeAsmJs op, RegSlot R0) = 0; virtual void AsmReg2(OpCodeAsmJs op, RegSlot R0, RegSlot R1) = 0; virtual void AsmReg3(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2) = 0; + virtual void AsmReg4(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3) = 0; + virtual void AsmReg5(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4) = 0; + virtual void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8) = 0; + virtual void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, + RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16) = 0; + virtual void AsmShuffle(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, uint8 indices[]) = 0; + virtual void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0) = 0; + virtual void WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3) = 0; + virtual void AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, uint32 slotId) = 0; virtual void AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op = OpCodeAsmJs::AsmBr) = 0; virtual void AsmBrReg1(OpCodeAsmJs op, ByteCodeLabel labelID, RegSlot R1) = 0; diff --git a/lib/Runtime/ByteCode/LayoutTypesAsmJs.h b/lib/Runtime/ByteCode/LayoutTypesAsmJs.h index 8e6549e7b3a..89f0edf46a0 100644 --- a/lib/Runtime/ByteCode/LayoutTypesAsmJs.h +++ b/lib/Runtime/ByteCode/LayoutTypesAsmJs.h @@ -113,7 +113,8 @@ #define LAYOUT_PREFIX_Float32x4() F4_ #define LAYOUT_PREFIX_Bool32x4() B4_ #define LAYOUT_PREFIX_Int32x4() I4_ -#define LAYOUT_PREFIX_Float64x2() F2_ +#define LAYOUT_PREFIX_Float64x2() D2_ +#define LAYOUT_PREFIX_Int64x2() I2_ #define LAYOUT_PREFIX_Int16x8() I8_ #define LAYOUT_PREFIX_Bool16x8() B8_ #define LAYOUT_PREFIX_Int8x16() I16_ @@ -214,6 +215,7 @@ LAYOUT_TYPE_WMS_REG4 ( Float32x4_2Int1Float1 , Float32x4, Float32x4, // Int32x4_2 LAYOUT_TYPE_WMS_REG2 ( Int32x4_2 , Int32x4, Int32x4) LAYOUT_TYPE_WMS_REG3 ( Int32x4_3 , Int32x4, Int32x4, Int32x4) +LAYOUT_TYPE_WMS_REG4 ( Int32x4_4 , Int32x4, Int32x4, Int32x4, Int32x4) LAYOUT_TYPE_WMS_REG3 ( Bool32x4_1Int32x4_2 , Bool32x4, Int32x4, Int32x4) LAYOUT_TYPE_WMS_REG4 ( Int32x4_1Bool32x4_1Int32x4_2 , Int32x4, Bool32x4, Int32x4, Int32x4) LAYOUT_TYPE_WMS_REG5 ( Int32x4_1Int4 , Int32x4, Int, Int, Int, Int) @@ -231,14 +233,18 @@ LAYOUT_TYPE_WMS_REG2 ( Int32x4_1Int8x16_1 , Int32x4, Int8x16) LAYOUT_TYPE_WMS_REG2 ( Int32x4_1Uint8x16_1 , Int32x4, Uint8x16) LAYOUT_TYPE_WMS_REG3 ( Int1Int32x4_1Int1 , Int, Int32x4, Int) LAYOUT_TYPE_WMS_REG4 ( Int32x4_2Int2 , Int32x4, Int32x4, Int, Int) +LAYOUT_TYPE_WMS_REG5 ( Float32x4_IntConst4 , Float32x4, IntConst, IntConst, IntConst, IntConst) + // Float64x2 +LAYOUT_TYPE_WMS_REG3 (Double1Float64x2_1Int1 , Double, Float64x2, Int) +LAYOUT_TYPE_WMS_REG4 (Float64x2_2Int1Double1 , Float64x2, Float64x2, Int, Double) +LAYOUT_TYPE_WMS_REG2 (Float64x2_2 , Float64x2, Float64x2) +LAYOUT_TYPE_WMS_REG3 (Float64x2_3 , Float64x2, Float64x2, Float64x2) +LAYOUT_TYPE_WMS_REG2 (Float64x2_1Double1 , Float64x2, Double) // Disabled for now #if 0 -LAYOUT_TYPE_WMS_REG2 ( Float64x2_2 , Float64x2, Float64x2) -LAYOUT_TYPE_WMS_REG3 ( Float64x2_3 , Float64x2, Float64x2, Float64x2) LAYOUT_TYPE_WMS_REG4 ( Float64x2_4 , Float64x2, Float64x2, Float64x2, Float64x2) LAYOUT_TYPE_WMS_REG3 ( Float64x2_1Double2 , Float64x2, Double, Double) -LAYOUT_TYPE_WMS_REG2 ( Float64x2_1Double1 , Float64x2, Double) LAYOUT_TYPE_WMS_REG3 ( Float64x2_2Double1 , Float64x2, Float64x2, Double) LAYOUT_TYPE_WMS_REG4 ( Float64x2_2Int2 , Float64x2, Float64x2, Int, Int) LAYOUT_TYPE_WMS_REG5 ( Float64x2_3Int2 , Float64x2, Float64x2, Float64x2, Int, Int) @@ -248,6 +254,15 @@ LAYOUT_TYPE_WMS_REG4 ( Float64x2_1Int32x4_1Float64x2_2 , Float64x2, Int32x4, F LAYOUT_TYPE_WMS_REG2 ( Reg1Float64x2_1 , Reg, Float64x2) #endif //0 +//Int64x2 +LAYOUT_TYPE_WMS_REG2 ( Int64x2_1Long1 , Int64x2, Long) +LAYOUT_TYPE_WMS_REG3 ( Long1Int64x2_1Int1 , Long, Int64x2, Int) +LAYOUT_TYPE_WMS_REG4 ( Int64x2_2_Int1_Long1 , Int64x2, Int64x2, Int, Long) +LAYOUT_TYPE_WMS_REG3 ( Int64x2_3 , Int64x2, Int64x2, Int64x2) +LAYOUT_TYPE_WMS_REG2 ( Int64x2_2 , Int64x2, Int64x2) +LAYOUT_TYPE_WMS_REG3 ( Int64x2_2Int1 , Int64x2, Int64x2, Int) +LAYOUT_TYPE_WMS_REG2 ( Int1Bool64x2_1 , Int, Int64x2) + // Int16x8 LAYOUT_TYPE_WMS_REG9 ( Int16x8_1Int8 , Int16x8, Int, Int, Int, Int, Int, Int, Int, Int) LAYOUT_TYPE_WMS_REG2 ( Reg1Int16x8_1 , Reg, Int16x8) @@ -376,8 +391,8 @@ LAYOUT_TYPE_WMS_REG2 ( Bool8x16_2 , Bool8x16, Bool8x16) LAYOUT_TYPE_WMS_REG3 ( Bool8x16_3 , Bool8x16, Bool8x16, Bool8x16) LAYOUT_TYPE_WMS_REG2 ( Reg1Bool8x16_1 , Reg, Bool8x16) - LAYOUT_TYPE_WMS ( AsmSimdTypedArr ) +LAYOUT_TYPE_WMS ( AsmShuffle ) #endif #undef LAYOUT_TYPE_DUP diff --git a/lib/Runtime/ByteCode/OpCodesSimd.h b/lib/Runtime/ByteCode/OpCodesSimd.h index 28fc1aa56c2..349275b290f 100644 --- a/lib/Runtime/ByteCode/OpCodesSimd.h +++ b/lib/Runtime/ByteCode/OpCodesSimd.h @@ -95,6 +95,9 @@ MACRO_SIMD_ASMJS_ONLY_WMS ( Simd128_I_ArgOut_I4 , Reg1Int32x4_1 MACRO_SIMD(Simd128_End, Empty, None, None) // Just a marker to indicate SIMD opcodes region MACRO_SIMD_EXTEND(Simd128_Start_Extend, Empty, None, None, 0) // Just a marker to indicate SIMD extended opcodes region +// Int32x4 +MACRO_SIMD_EXTEND_WMS ( Simd128_BitSelect_I4 , Int32x4_4 , None , OpCanCSE , 0) + // Float32x4 MACRO_SIMD_EXTEND_WMS ( Simd128_FloatsToF4 , Float32x4_1Float4 , None , OpCanCSE , 6, &Js::SIMDFloat32x4Lib::EntryInfo::Float32x4, T_F4, T_FLT, T_FLT, T_FLT, T_FLT) MACRO_SIMD_EXTEND_WMS ( Simd128_Splat_F4 , Float32x4_1Float1 , None , OpCanCSE , 3, &Js::SIMDFloat32x4Lib::EntryInfo::Splat , T_F4, T_FLT) @@ -127,36 +130,57 @@ MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_Ld_F4 , Float32x4_ MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_LdSlot_F4 , ElementSlot , None , None ) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_StSlot_F4 , ElementSlot , None , None ) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_Return_F4 , Float32x4_2 , None , None ) +MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_ArgOut_F4 , Reg1Float32x4_1 , None , None ) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_I_ArgOut_F4 , Reg1Float32x4_1 , None , None ) +// Int64x2 +MACRO_SIMD_EXTEND_WMS(Simd128_Splat_I2, Int64x2_1Long1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ExtractLane_I2, Long1Int64x2_1Int1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ReplaceLane_I2, Int64x2_2_Int1_Long1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Add_I2, Int64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Sub_I2, Int64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Neg_I2, Int64x2_2, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ShLtByScalar_I2, Int64x2_2Int1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ShRtByScalar_I2, Int64x2_2Int1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ShRtByScalar_U2, Int64x2_2Int1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_AnyTrue_B2, Int1Bool64x2_1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_AllTrue_B2, Int1Bool64x2_1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_FromFloat64x2_I2, Int64x2_2, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_FromFloat64x2_U2, Int64x2_2, None, None, 0) // Float64x2 -#if 0 //Disabling this type until the specification decides to include or not. -MACRO_SIMD_EXTEND_WMS(Simd128_DoublesToD2, Float64x2_1Double2, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Splat_D2, Float64x2_1Double1, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_FromFloat32x4_D2, Float64x2_1Float32x4_1, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_FromFloat32x4Bits_D2, Float64x2_1Float32x4_1, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_FromInt32x4_D2, Float64x2_1Int32x4_1, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_FromInt32x4Bits_D2, Float64x2_1Int32x4_1, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Abs_D2, Float64x2_2, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ExtractLane_D2, Double1Float64x2_1Int1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_ReplaceLane_D2, Float64x2_2Int1Double1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Eq_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Neq_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Lt_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_LtEq_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Gt_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_GtEq_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Neg_D2, Float64x2_2, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Abs_D2, Float64x2_2, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Min_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Max_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Add_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Sub_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Mul_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Div_D2, Float64x2_3, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_Sqrt_D2, Float64x2_2, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_FromInt64x2_D2, Int64x2_2, None, None, 0) //Int64x2_2 is intentional +MACRO_SIMD_EXTEND_WMS(Simd128_FromUint64x2_D2, Int64x2_2, None, None, 0) +// Float64x2 +#if 0 //Disabling this type until the specification decides to include or not. +MACRO_SIMD_EXTEND_WMS(Simd128_DoublesToD2, Float64x2_1Double2, None, None, 0) + +MACRO_SIMD_EXTEND_WMS(Simd128_FromFloat32x4_D2, Float64x2_1Float32x4_1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_FromFloat32x4Bits_D2, Float64x2_1Float32x4_1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_FromInt32x4_D2, Float64x2_1Int32x4_1, None, None, 0) +MACRO_SIMD_EXTEND_WMS(Simd128_FromInt32x4Bits_D2, Float64x2_1Int32x4_1, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Clamp_D2, Float64x2_4, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Min_D2, Float64x2_3, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Max_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Rcp_D2, Float64x2_2, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_RcpSqrt_D2, Float64x2_2, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Sqrt_D2, Float64x2_2, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Lt_D2, Float64x2_3, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Select_D2, Float64x2_1Int32x4_1Float64x2_2, None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_LdSignMask_D2, Int1Float64x2_1, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_LtEq_D2, Float64x2_3, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Eq_D2, Float64x2_3, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Neq_D2, Float64x2_3, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_Gt_D2, Float64x2_3, None, None, 0) -MACRO_SIMD_EXTEND_WMS(Simd128_GtEq_D2, Float64x2_3, None, None, 0) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS(Simd128_Return_D2, Float64x2_2, None, None) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS(Simd128_I_ArgOut_D2, Reg1Float64x2_1, None, None) @@ -198,7 +222,7 @@ MACRO_SIMD_EXTEND_WMS(Simd128_Not_B16 , Bool8x16_2 , No MACRO_SIMD_EXTEND_WMS(Simd128_Neg_U4 , Uint32x4_2 , None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Neg_U8 , Uint16x8_2 , None, None, 0) MACRO_SIMD_EXTEND_WMS(Simd128_Neg_U16 , Uint8x16_2 , None, None, 0) -MACRO_SIMD_BACKEND_ONLY(Simd128_LdC , Empty , None, OpCanCSE) // Load Simd128 const stack slot +MACRO_SIMD_EXTEND_WMS(Simd128_LdC , Float32x4_IntConst4 , None, OpCanCSE, 0) // Load Simd128 const stack slot #if 0 MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS(Simd128_Ld_D2, Float64x2_2, None, None) @@ -413,6 +437,8 @@ MACRO_SIMD_EXTEND_WMS ( Simd128_ExtractLane_U16 , Int1Uint8x16_1 MACRO_SIMD_EXTEND_WMS ( Simd128_ReplaceLane_U16 , Uint8x16_2Int2 , None , None , 0) MACRO_SIMD_EXTEND_WMS ( Simd128_Swizzle_U16 , Uint8x16_2Int16 , None , None , 0) MACRO_SIMD_EXTEND_WMS ( Simd128_Shuffle_U16 , Uint8x16_3Int16 , None , None , 0) +MACRO_SIMD_EXTEND_WMS ( Simd128_Shuffle_V8X16 , AsmShuffle , None , None , 0) + MACRO_SIMD_EXTEND_WMS ( Simd128_Splat_U16 , Uint8x16_1Int1 , None , None , 0) MACRO_SIMD_EXTEND_WMS ( Simd128_And_U16 , Uint8x16_3 , None , None , 0) MACRO_SIMD_EXTEND_WMS ( Simd128_Or_U16 , Uint8x16_3 , None , None , 0) @@ -487,8 +513,7 @@ MACRO_SIMD_EXTEND_WMS ( Simd128_AllTrue_B16 , Int1Bool8x16_1 MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_Ld_B16 , Bool8x16_2 , None , None ) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_LdSlot_B16 , ElementSlot , None , None ) MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS( Simd128_StSlot_B16 , ElementSlot , None , None ) - -MACRO_SIMD_EXTEND ( Simd128_End_Extend , Empty , None , None , 0) // Just a marker to indicate SIMD opcodes region +MACRO_SIMD_EXTEND ( Simd128_End_Extend , Empty , None , None , 0) // Just a marker to indicate SIMD opcodes region #undef T_F4 #undef T_I4 #undef T_INT diff --git a/lib/Runtime/ByteCode/OpLayoutsAsmJs.h b/lib/Runtime/ByteCode/OpLayoutsAsmJs.h index 9a928b5f797..e7f279b2f57 100644 --- a/lib/Runtime/ByteCode/OpLayoutsAsmJs.h +++ b/lib/Runtime/ByteCode/OpLayoutsAsmJs.h @@ -275,6 +275,15 @@ namespace Js typename SizePolicy::RegSlotType R18; }; + template + struct OpLayoutT_AsmShuffle + { + uint8 INDICES[16]; + typename SizePolicy::RegSlotType R0; + typename SizePolicy::RegSlotType R1; + typename SizePolicy::RegSlotType R2; + }; + #define RegLayoutType typename SizePolicy::RegSlotType #define IntLayoutType typename SizePolicy::RegSlotType #define LongLayoutType typename SizePolicy::RegSlotType @@ -288,6 +297,7 @@ namespace Js #define Bool32x4LayoutType typename SizePolicy::RegSlotType #define Int32x4LayoutType typename SizePolicy::RegSlotType #define Float64x2LayoutType typename SizePolicy::RegSlotType +#define Int64x2LayoutType typename SizePolicy::RegSlotType #define Int16x8LayoutType typename SizePolicy::RegSlotType #define Bool16x8LayoutType typename SizePolicy::RegSlotType #define Int8x16LayoutType typename SizePolicy::RegSlotType @@ -297,6 +307,7 @@ namespace Js #define Uint8x16LayoutType typename SizePolicy::RegSlotType #define LAYOUT_FIELDS_HELPER(x, y) x ## y #define LAYOUT_FIELDS_DEF(x, y) LAYOUT_FIELDS_HELPER(x, y) + #define LAYOUT_TYPE_WMS_REG2(layout, t0, t1) \ template struct OpLayoutT_##layout\ {\ @@ -478,6 +489,7 @@ namespace Js #undef Uint32x4LayoutType #undef Uint16x8LayoutType #undef Uint8x16LayoutType +#undef Int64x2LayoutType template struct OpLayoutT_AsmUnsigned1 @@ -528,6 +540,7 @@ namespace Js typename SizePolicy::RegSlotType Value; ArrayBufferView::ViewType ViewType; int8 DataWidth; // # of bytes to load/store + uint32 Offset; //WASM.SIMD }; // Generate the multi size layout type defs diff --git a/lib/Runtime/Language/AsmJsTypes.cpp b/lib/Runtime/Language/AsmJsTypes.cpp index a99f463d75e..ddab5ae46fb 100644 --- a/lib/Runtime/Language/AsmJsTypes.cpp +++ b/lib/Runtime/Language/AsmJsTypes.cpp @@ -48,6 +48,7 @@ namespace Js case Intish: return _u("intish"); case Void: return _u("void"); case Int32x4: return _u("SIMD.Int32x4"); + case Int64x2: return _u("SIMD.Int64x2"); case Bool32x4: return _u("SIMD.Bool32x4"); case Bool16x8: return _u("SIMD.Bool16x8"); case Bool8x16: return _u("SIMD.Bool8x16"); @@ -103,6 +104,10 @@ namespace Js { return which_ == Float64x2; } + bool AsmJsType::isSIMDInt64x2() const + { + return which_ == Int64x2; + } bool AsmJsType::isSIMDUint32x4() const { diff --git a/lib/Runtime/Language/AsmJsTypes.h b/lib/Runtime/Language/AsmJsTypes.h index 77ae4b13c8f..9827cd8d029 100644 --- a/lib/Runtime/Language/AsmJsTypes.h +++ b/lib/Runtime/Language/AsmJsTypes.h @@ -143,7 +143,8 @@ namespace Js Bool16x8, Bool8x16, Float32x4, - Float64x2 + Float64x2, + Int64x2 }; private: @@ -185,6 +186,7 @@ namespace Js bool isSIMDUint32x4() const; bool isSIMDUint16x8() const; bool isSIMDUint8x16() const; + bool isSIMDInt64x2() const; AsmJsRetType toRetType() const; }; @@ -209,6 +211,7 @@ namespace Js Bool8x16 = AsmJsType::Bool8x16, Float32x4 = AsmJsType::Float32x4, Float64x2 = AsmJsType::Float64x2, + Int64x2 = AsmJsType::Int64x2, Int16x8 = AsmJsType::Int16x8, Int8x16 = AsmJsType::Int8x16, Uint32x4 = AsmJsType::Uint32x4, @@ -271,7 +274,8 @@ namespace Js Int8x16 = AsmJsType::Int8x16, Uint32x4 = AsmJsType::Uint32x4, Uint16x8 = AsmJsType::Uint16x8, - Uint8x16 = AsmJsType::Uint8x16 + Uint8x16 = AsmJsType::Uint8x16, + Int64x2 = AsmJsType::Int64x2 }; private: diff --git a/lib/Runtime/Language/CMakeLists.txt b/lib/Runtime/Language/CMakeLists.txt index 956f44bd299..a19d65de49f 100644 --- a/lib/Runtime/Language/CMakeLists.txt +++ b/lib/Runtime/Language/CMakeLists.txt @@ -41,6 +41,7 @@ set(CRL_SOURCE_FILES ${CRL_SOURCE_FILES} SimdBool8x16OperationX86X64.cpp SimdFloat32x4Operation.cpp SimdFloat32x4OperationX86X64.cpp + SimdInt64x2Operation.cpp SimdFloat64x2Operation.cpp SimdFloat64x2OperationX86X64.cpp SimdInt16x8Operation.cpp diff --git a/lib/Runtime/Language/Chakra.Runtime.Language.vcxproj b/lib/Runtime/Language/Chakra.Runtime.Language.vcxproj index a24fca0c787..1964cf3d244 100644 --- a/lib/Runtime/Language/Chakra.Runtime.Language.vcxproj +++ b/lib/Runtime/Language/Chakra.Runtime.Language.vcxproj @@ -1,4 +1,4 @@ - + @@ -94,6 +94,7 @@ + true @@ -239,6 +240,7 @@ + diff --git a/lib/Runtime/Language/InterpreterHandlerAsmJs.inl b/lib/Runtime/Language/InterpreterHandlerAsmJs.inl index 687a9d6c2d4..998a3d311c3 100755 --- a/lib/Runtime/Language/InterpreterHandlerAsmJs.inl +++ b/lib/Runtime/Language/InterpreterHandlerAsmJs.inl @@ -36,9 +36,8 @@ EXDEF2 (NOPASMJS , InvalidOpCode, Empty DEF2_WMS( R1toD1Mem , Conv_VTD , JavascriptConversion::ToNumber ) // convert var to double DEF2_WMS( R1toF1Mem , Conv_VTF , JavascriptConversion::ToNumber ) // convert var to float DEF2_WMS( R1toI1Mem , Conv_VTI , JavascriptMath::ToInt32 ) // convert var to int - DEF3_WMS( CUSTOM_ASMJS , Conv_VTL , OP_InvalidWasmTypeConversion , Long1Reg1 ) // convert var to int64 - DEF3_WMS( CUSTOM_ASMJS , ArgOut_Long , OP_InvalidWasmTypeConversion , Reg1Long1 ) // convert int64 to Var - + DEF3_WMS( CUSTOM_ASMJS , Conv_VTL , (OP_InvalidWasmTypeConversion) , Long1Reg1 ) // convert var to int64 + DEF3_WMS( CUSTOM_ASMJS , ArgOut_Long , (OP_InvalidWasmTypeConversion) , Reg1Long1 ) // convert int64 to Var DEF3_WMS( CUSTOM_ASMJS , LdArr_Func , OP_LdArrFunc , ElementSlot ) DEF3_WMS( CUSTOM_ASMJS , LdArr_WasmFunc,OP_LdArrWasmFunc , ElementSlot ) DEF3_WMS( CUSTOM_ASMJS , CheckSignature,OP_CheckSignature , Reg1IntConst1 ) @@ -301,7 +300,9 @@ EXDEF4_WMS(TEMPLATE_ASMJS , Simd128_StSlot_F4 , OP_StSlotPrimitive EXDEF2_WMS( SIMD_F4_1toF4_1 , Simd128_Return_F4 , (AsmJsSIMDValue) ) DEF2_WMS( SIMD_I4_1toI4_1 , Simd128_Return_I4 , (AsmJsSIMDValue) ) +EXDEF2_WMS(SIMD_D1toD2_1 , Simd128_Splat_D2 , Js::SIMDFloat64x2Operation::OpSplat ) EXDEF2_WMS( SIMD_F1toF4_1 , Simd128_Splat_F4 ,Js::SIMDFloat32x4Operation::OpSplat ) +EXDEF2_WMS( SIMD_L1toI2_1 , Simd128_Splat_I2 ,Js::SIMDInt64x2Operation::OpSplat ) DEF2_WMS( SIMD_I1toI4_1 , Simd128_Splat_I4 ,Js::SIMDInt32x4Operation::OpSplat ) //EXDEF2_WMS( SIMD_D2_1toF4_1 , Simd128_FromFloat64x2_F4 ,SIMDFloat32x4Operation::OpFromFloat64x2 ) @@ -316,6 +317,7 @@ EXDEF2_WMS( SIMD_I4_1toF4_1 , Simd128_FromInt32x4Bits_F4 ,Js::SIMDUtils::FromS EXDEF2_WMS( SIMD_F4_1toF4_1 , Simd128_Abs_F4 ,SIMDFloat32x4Operation::OpAbs ) +EXDEF2_WMS( SIMD_I2_1toI2_1 , Simd128_Neg_I2 ,SIMDInt64x2Operation::OpNeg ) EXDEF2_WMS( SIMD_F4_1toF4_1 , Simd128_Neg_F4 ,SIMDFloat32x4Operation::OpNeg ) DEF2_WMS( SIMD_I4_1toI4_1 , Simd128_Neg_I4 ,SIMDInt32x4Operation::OpNeg ) @@ -328,21 +330,30 @@ EXDEF2_WMS( SIMD_B4_1toB4_1 , Simd128_Not_B4 , Js::SIMDInt32x4Oper EXDEF2_WMS( SIMD_B8_1toB8_1 , Simd128_Not_B8 , Js::SIMDInt32x4Operation::OpNot ) EXDEF2_WMS( SIMD_B16_1toB16_1, Simd128_Not_B16 , Js::SIMDInt32x4Operation::OpNot ) -EXDEF2_WMS( SIMD_B4_1toI1, Simd128_AllTrue_B4 , Js::SIMDBool32x4Operation::OpAllTrue ) -EXDEF2_WMS( SIMD_B8_1toI1, Simd128_AllTrue_B8 , Js::SIMDBool32x4Operation::OpAllTrue ) -EXDEF2_WMS( SIMD_B16_1toI1, Simd128_AllTrue_B16 , Js::SIMDBool32x4Operation::OpAllTrue ) +EXDEF2_WMS( SIMD_B2_1toI1, Simd128_AllTrue_B2 , Js::SIMDBool32x4Operation::OpAllTrue ) +EXDEF2_WMS( SIMD_B4_1toI1, Simd128_AllTrue_B4 , Js::SIMDBool32x4Operation::OpAllTrue ) +EXDEF2_WMS( SIMD_B8_1toI1, Simd128_AllTrue_B8 , Js::SIMDBool32x4Operation::OpAllTrue ) +EXDEF2_WMS( SIMD_B16_1toI1, Simd128_AllTrue_B16 , Js::SIMDBool32x4Operation::OpAllTrue ) -EXDEF2_WMS( SIMD_B4_1toI1, Simd128_AnyTrue_B4 , Js::SIMDBool32x4Operation::OpAnyTrue ) -EXDEF2_WMS( SIMD_B8_1toI1, Simd128_AnyTrue_B8 , Js::SIMDBool32x4Operation::OpAnyTrue ) -EXDEF2_WMS( SIMD_B16_1toI1, Simd128_AnyTrue_B16 , Js::SIMDBool32x4Operation::OpAnyTrue ) +EXDEF2_WMS( SIMD_B2_1toI1, Simd128_AnyTrue_B2 , Js::SIMDBool32x4Operation::OpAnyTrue ) +EXDEF2_WMS( SIMD_B4_1toI1, Simd128_AnyTrue_B4 , Js::SIMDBool32x4Operation::OpAnyTrue ) +EXDEF2_WMS( SIMD_B8_1toI1, Simd128_AnyTrue_B8 , Js::SIMDBool32x4Operation::OpAnyTrue ) +EXDEF2_WMS( SIMD_B16_1toI1, Simd128_AnyTrue_B16 , Js::SIMDBool32x4Operation::OpAnyTrue ) DEF2_WMS(SIMD_I4_1I1toI4_1 , Simd128_ShLtByScalar_I4 , Js::SIMDInt32x4Operation::OpShiftLeftByScalar) +EXDEF2_WMS(SIMD_I2_1I1toI2_1 , Simd128_ShLtByScalar_I2 , Js::SIMDInt64x2Operation::OpShiftLeftByScalar) DEF2_WMS(SIMD_I4_1I1toI4_1 , Simd128_ShRtByScalar_I4 , Js::SIMDInt32x4Operation::OpShiftRightByScalar) +EXDEF2_WMS(SIMD_I2_1I1toI2_1 , Simd128_ShRtByScalar_I2 , Js::SIMDInt64x2Operation::OpShiftRightByScalar) + +// ternary ops +EXDEF2_WMS( SIMD_I4_3toI4_1 , Simd128_BitSelect_I4 , SIMDUtils::SIMD128BitSelect) // binary ops +EXDEF2_WMS( SIMD_I2_2toI2_1 , Simd128_Add_I2 , Js::SIMDInt64x2Operation::OpAdd ) EXDEF2_WMS( SIMD_F4_2toF4_1 , Simd128_Add_F4 , Js::SIMDFloat32x4Operation::OpAdd ) DEF2_WMS( SIMD_I4_2toI4_1 , Simd128_Add_I4 , Js::SIMDInt32x4Operation::OpAdd ) +EXDEF2_WMS( SIMD_I2_2toI2_1 , Simd128_Sub_I2 , Js::SIMDInt64x2Operation::OpSub ) EXDEF2_WMS( SIMD_F4_2toF4_1 , Simd128_Sub_F4 , Js::SIMDFloat32x4Operation::OpSub ) DEF2_WMS( SIMD_I4_2toI4_1 , Simd128_Sub_I4 , Js::SIMDInt32x4Operation::OpSub ) @@ -395,6 +406,7 @@ EXDEF2_WMS( SIMD_B8_1U8_2toU8_1 , Simd128_Select_U8 , Js::SIMDInt32x4Opera EXDEF2_WMS( SIMD_B16_1U16_2toU16_1 , Simd128_Select_U16 , Js::SIMDInt32x4Operation::OpSelect ) // args out, copy value to outParams +EXDEF3_WMS ( CUSTOM_ASMJS , Simd128_ArgOut_F4 , (OP_InvalidWasmTypeConversion) , Reg1Float32x4_1) EXDEF2_WMS ( SIMD_F4_1toR1Mem , Simd128_I_ArgOut_F4 , OP_I_SetOutAsmSimd ) DEF2_WMS ( SIMD_I4_1toR1Mem , Simd128_I_ArgOut_I4 , OP_I_SetOutAsmSimd ) @@ -459,15 +471,19 @@ EXDEF2_WMS ( SIMD_I16_1toR1Mem , Simd128_I_ArgOut_I16 , OP_I_SetOutAsmSimd EXDEF2_WMS ( SIMD_I16_1I1toI1 , Simd128_ExtractLane_I16 , SIMDUtils::SIMD128InnerExtractLaneI16 ) EXDEF2_WMS ( SIMD_I16_1I2toI16_1 , Simd128_ReplaceLane_I16 , SIMDUtils::SIMD128InnerReplaceLaneI16 ) EXDEF2_WMS ( SIMD_I4_1I1toI1 , Simd128_ExtractLane_I4 , SIMDUtils::SIMD128InnerExtractLaneI4 ) +EXDEF2_WMS ( SIMD_I2_1I1toL1 , Simd128_ExtractLane_I2 , SIMDUtils::SIMD128InnerExtractLaneI2 ) +EXDEF2_WMS ( SIMD_D2_1I1toD1 , Simd128_ExtractLane_D2 , SIMDUtils::SIMD128InnerExtractLaneD2 ) +EXDEF2_WMS ( SIMD_D2_1I1D1toD2_1 , Simd128_ReplaceLane_D2 , SIMDUtils::SIMD128InnerReplaceLaneD2 ) EXDEF2_WMS ( SIMD_F4_1I1toF1 , Simd128_ExtractLane_F4 , SIMDUtils::SIMD128InnerExtractLaneF4 ) EXDEF2_WMS ( SIMD_I8_1I1toI1 , Simd128_ExtractLane_I8 , SIMDUtils::SIMD128InnerExtractLaneI8 ) EXDEF2_WMS ( SIMD_U4_1I1toI1 , Simd128_ExtractLane_U4 , SIMDUtils::SIMD128InnerExtractLaneI4 ) EXDEF2_WMS ( SIMD_U8_1I1toI1 , Simd128_ExtractLane_U8 , SIMDUtils::SIMD128InnerExtractLaneI8 ) EXDEF2_WMS ( SIMD_U16_1I1toI1 , Simd128_ExtractLane_U16 , SIMDUtils::SIMD128InnerExtractLaneI16 ) -EXDEF2_WMS ( SIMD_B4_1I1toI1 , Simd128_ExtractLane_B4 , SIMDUtils::SIMD128InnerExtractLaneI4 ) -EXDEF2_WMS ( SIMD_B8_1I1toI1 , Simd128_ExtractLane_B8 , SIMDUtils::SIMD128InnerExtractLaneI8 ) -EXDEF2_WMS ( SIMD_B16_1I1toI1 , Simd128_ExtractLane_B16 , SIMDUtils::SIMD128InnerExtractLaneI16 ) +EXDEF2_WMS ( SIMD_B4_1I1toI1 , Simd128_ExtractLane_B4 , SIMDUtils::SIMD128InnerExtractLaneB4 ) +EXDEF2_WMS ( SIMD_B8_1I1toI1 , Simd128_ExtractLane_B8 , SIMDUtils::SIMD128InnerExtractLaneB8 ) +EXDEF2_WMS ( SIMD_B16_1I1toI1 , Simd128_ExtractLane_B16 , SIMDUtils::SIMD128InnerExtractLaneB16 ) +EXDEF2_WMS ( SIMD_I2_1I1L1toI2_1 , Simd128_ReplaceLane_I2 , SIMDUtils::SIMD128InnerReplaceLaneI2 ) EXDEF2_WMS ( SIMD_I4_1I2toI4_1 , Simd128_ReplaceLane_I4 , SIMDUtils::SIMD128InnerReplaceLaneI4 ) EXDEF2_WMS ( SIMD_F4_1I1F1toF4_1 , Simd128_ReplaceLane_F4 , SIMDUtils::SIMD128InnerReplaceLaneF4 ) EXDEF2_WMS ( SIMD_I8_1I2toI8_1 , Simd128_ReplaceLane_I8 , SIMDUtils::SIMD128InnerReplaceLaneI8 ) @@ -583,6 +599,7 @@ EXDEF2_WMS(SIMD_I16_1I1toI16_1 , Simd128_ShRtByScalar_I16 , Js::SIMDInt8x16Op EXDEF2_WMS(SIMD_U4_1I1toU4_1 , Simd128_ShLtByScalar_U4 , Js::SIMDInt32x4Operation::OpShiftLeftByScalar ) EXDEF2_WMS(SIMD_U4_1I1toU4_1 , Simd128_ShRtByScalar_U4 , Js::SIMDUint32x4Operation::OpShiftRightByScalar ) +EXDEF2_WMS(SIMD_I2_1I1toI2_1 , Simd128_ShRtByScalar_U2 , Js::SIMDInt64x2Operation::OpShiftRightByScalarU) EXDEF2_WMS(SIMD_U8_1I1toU8_1 , Simd128_ShLtByScalar_U8 , Js::SIMDInt16x8Operation::OpShiftLeftByScalar ) EXDEF2_WMS(SIMD_U8_1I1toU8_1 , Simd128_ShRtByScalar_U8 , Js::SIMDUint16x8Operation::OpShiftRightByScalar ) @@ -629,6 +646,7 @@ EXDEF2_WMS( SIMD_U8_1toI16_1 , Simd128_FromUint16x8Bits_I16 , Js::SIMDUtils::F EXDEF2_WMS( SIMD_U16_1toI16_1, Simd128_FromUint8x16Bits_I16 , Js::SIMDUtils::FromSimdBits ) EXDEF3_WMS( CUSTOM_ASMJS , Simd128_FromFloat32x4_U4 , OP_SimdUint32x4FromFloat32x4 , Uint32x4_1Float32x4_1) +EXDEF3_WMS( CUSTOM_ASMJS , Simd128_LdC , OP_WasmSimdConst , Float32x4_IntConst4) EXDEF2_WMS( SIMD_F4_1toU4_1 , Simd128_FromFloat32x4Bits_U4 , Js::SIMDUtils::FromSimdBits ) EXDEF2_WMS( SIMD_I4_1toU4_1 , Simd128_FromInt32x4Bits_U4 , Js::SIMDUtils::FromSimdBits ) EXDEF2_WMS( SIMD_I8_1toU4_1 , Simd128_FromInt16x8Bits_U4 , Js::SIMDUtils::FromSimdBits ) @@ -689,22 +707,10 @@ EXDEF2_WMS( SIMD_U16_2toB16_1 , Simd128_Gt_U16 , Js::SIMDUint8x16Op EXDEF2_WMS( SIMD_U4_1toU4_1 , Simd128_Neg_U4 , SIMDInt32x4Operation::OpNeg ) EXDEF2_WMS( SIMD_U8_1toU8_1 , Simd128_Neg_U8 , SIMDInt16x8Operation::OpNeg ) EXDEF2_WMS( SIMD_U16_1toU16_1 , Simd128_Neg_U16 , SIMDInt8x16Operation::OpNeg ) +EXDEF3_WMS( CUSTOM_ASMJS , Simd128_Shuffle_V8X16 , OP_SimdShuffleV8X16 , AsmShuffle) -#if 0 -EXDEF2_WMS(SIMD_D2toD2_1, Simd128_DoublesToD2, SIMDFloat64x2Operation::OpFloat64x2) -EXDEF4_WMS(TEMPLATE_ASMJS, Simd128_LdSlot_D2, OP_LdSlotPrimitive, ElementSlot, AsmJsSIMDValue) -EXDEF4_WMS(TEMPLATE_ASMJS, Simd128_StSlot_D2, OP_StSlotPrimitive, ElementSlot, AsmJsSIMDValue) -EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Return_D2, (AsmJsSIMDValue)) -EXDEF2_WMS(SIMD_D1toD2_1, Simd128_Splat_D2, Js::SIMDFloat64x2Operation::OpSplat) -EXDEF2_WMS(SIMD_F4_1toD2_1, Simd128_FromFloat32x4_D2, SIMDFloat64x2Operation::OpFromFloat32x4) -EXDEF2_WMS(SIMD_F4_1toD2_1, Simd128_FromFloat32x4Bits_D2, Js::FromSimdBits) -EXDEF2_WMS(SIMD_I4_1toD2_1, Simd128_FromInt32x4_D2, SIMDFloat64x2Operation::OpFromInt32x4) -EXDEF2_WMS(SIMD_I4_1toD2_1, Simd128_FromInt32x4Bits_D2, Js::FromSimdBits) EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Abs_D2, SIMDFloat64x2Operation::OpAbs) EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Neg_D2, SIMDFloat64x2Operation::OpNeg) -EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Rcp_D2, SIMDFloat64x2Operation::OpReciprocal) -EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_RcpSqrt_D2, SIMDFloat64x2Operation::OpReciprocalSqrt) -EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Sqrt_D2, SIMDFloat64x2Operation::OpSqrt) EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_Add_D2, Js::SIMDFloat64x2Operation::OpAdd) EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_Sub_D2, Js::SIMDFloat64x2Operation::OpSub) EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_Mul_D2, Js::SIMDFloat64x2Operation::OpMul) @@ -717,6 +723,23 @@ EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_Eq_D2, Js::SIMDFloat64x2Operation::OpEqual) EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_Neq_D2, Js::SIMDFloat64x2Operation::OpNotEqual) EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_GtEq_D2, Js::SIMDFloat64x2Operation::OpGreaterThanOrEqual) EXDEF2_WMS(SIMD_D2_2toD2_1, Simd128_Gt_D2, Js::SIMDFloat64x2Operation::OpGreaterThan) +EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Sqrt_D2, SIMDFloat64x2Operation::OpSqrt) +EXDEF2_WMS(SIMD_I2_1toI2_P, Simd128_FromInt64x2_D2, SIMDFloat64x2Operation::OpConv) +EXDEF2_WMS(SIMD_I2_1toI2_P, Simd128_FromUint64x2_D2, SIMDFloat64x2Operation::OpConv) +EXDEF2_WMS(SIMD_I2_1toI2_P, Simd128_FromFloat64x2_I2, SIMDInt64x2Operation::OpTrunc) +EXDEF2_WMS(SIMD_I2_1toI2_P, Simd128_FromFloat64x2_U2, SIMDInt64x2Operation::OpTrunc) + +#if 0 +EXDEF2_WMS(SIMD_D2toD2_1, Simd128_DoublesToD2, SIMDFloat64x2Operation::OpFloat64x2) +EXDEF4_WMS(TEMPLATE_ASMJS, Simd128_LdSlot_D2, OP_LdSlotPrimitive, ElementSlot, AsmJsSIMDValue) +EXDEF4_WMS(TEMPLATE_ASMJS, Simd128_StSlot_D2, OP_StSlotPrimitive, ElementSlot, AsmJsSIMDValue) +EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Return_D2, (AsmJsSIMDValue)) +EXDEF2_WMS(SIMD_F4_1toD2_1, Simd128_FromFloat32x4_D2, SIMDFloat64x2Operation::OpFromFloat32x4) +EXDEF2_WMS(SIMD_F4_1toD2_1, Simd128_FromFloat32x4Bits_D2, Js::FromSimdBits) +EXDEF2_WMS(SIMD_I4_1toD2_1, Simd128_FromInt32x4_D2, SIMDFloat64x2Operation::OpFromInt32x4) +EXDEF2_WMS(SIMD_I4_1toD2_1, Simd128_FromInt32x4Bits_D2, Js::FromSimdBits) +EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_Rcp_D2, SIMDFloat64x2Operation::OpReciprocal) +EXDEF2_WMS(SIMD_D2_1toD2_1, Simd128_RcpSqrt_D2, SIMDFloat64x2Operation::OpReciprocalSqrt) EXDEF2_WMS(SIMD_I4_1D2_2toD2_1, Simd128_Select_D2, Js::SIMDFloat64x2Operation::OpSelect) EXDEF2_WMS(SIMD_D2_3toD2_1, Simd128_Clamp_D2, Js::SIMDFloat64x2Operation::OpClamp) EXDEF2_WMS(SIMD_D2_1toI1, Simd128_LdSignMask_D2, Js::SIMDFloat64x2Operation::OpGetSignMask) diff --git a/lib/Runtime/Language/InterpreterProcessOpCodeAsmJs.h b/lib/Runtime/Language/InterpreterProcessOpCodeAsmJs.h index 188c7302131..d239a33ba7c 100755 --- a/lib/Runtime/Language/InterpreterProcessOpCodeAsmJs.h +++ b/lib/Runtime/Language/InterpreterProcessOpCodeAsmJs.h @@ -42,7 +42,6 @@ #define PROCESS_FUNCtoA1Mem(name, func) PROCESS_FUNCtoA1Mem_COMMON(name, func,) - #define PROCESS_CUSTOM_ASMJS_COMMON(name, func, layout, suffix) \ case OpCodeAsmJs::name: \ { \ @@ -819,6 +818,84 @@ if (switchProfileMode) \ } #define PROCESS_SIMD_I1toU16_1(name, func) PROCESS_SIMD_I1toU16_1_COMMON(name, func,) +#define PROCESS_SIMD_B2_1toI1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int1Bool64x2_1, suffix); \ + SetRegRawInt(playout->I0, func(GetRegRawSimd(playout->I2_1))); \ + break; \ + } +#define PROCESS_SIMD_B2_1toI1(name, func) PROCESS_SIMD_B2_1toI1_COMMON(name, func,) + +#define PROCESS_SIMD_L1toI2_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int64x2_1Long1, suffix); \ + SetRegRawSimd(playout->I2_0, func(GetRegRawInt64(playout->L1))); \ + break; \ + } +#define PROCESS_SIMD_L1toI2_1(name, func) PROCESS_SIMD_L1toI2_1_COMMON(name, func,) + +#define PROCESS_SIMD_I2_1I1toL1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Long1Int64x2_1Int1, suffix); \ + SetRegRawInt64(playout->L0, func(GetRegRawSimd(playout->I2_1), GetRegRawInt(playout->I2))); \ + break; \ + } +#define PROCESS_SIMD_I2_1I1toL1(name, func) PROCESS_SIMD_I2_1I1toL1_COMMON(name, func,) + +#define PROCESS_SIMD_I2_1I1L1toI2_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int64x2_2_Int1_Long1, suffix); \ + SetRegRawSimd(playout->I2_0, func(GetRegRawSimd(playout->I2_1), GetRegRawInt(playout->I2), GetRegRawInt64(playout->L3))); \ + break; \ + } +#define PROCESS_SIMD_I2_1I1L1toI2_1(name, func) PROCESS_SIMD_I2_1I1L1toI2_1_COMMON(name, func,) + +#define PROCESS_SIMD_I2_2toI2_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int64x2_3, suffix); \ + SetRegRawSimd(playout->I2_0, func(GetRegRawSimd(playout->I2_1), GetRegRawSimd(playout->I2_2))); \ + break; \ + } +#define PROCESS_SIMD_I2_2toI2_1(name, func) PROCESS_SIMD_I2_2toI2_1_COMMON(name, func,) + +#define PROCESS_SIMD_I2_1toI2_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int64x2_2, suffix); \ + SetRegRawSimd(playout->I2_0, func(GetRegRawSimd(playout->I2_1))); \ + break; \ + } +#define PROCESS_SIMD_I2_1toI2_1 (name, func) PROCESS_SIMD_I2_1toI2_1_COMMON(name, func,) + +#define PROCESS_SIMD_I2_1toI2_P_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int64x2_2, suffix); \ + SIMDValue result {0}; \ + SIMDValue src = GetRegRawSimd(playout->I2_1); \ + func(&result, &src); \ + SetRegRawSimd(playout->I2_0, result); \ + break; \ + } +#define PROCESS_SIMD_I2_1toI2_P(name, func) PROCESS_SIMD_I2_1toI2_P_COMMON(name, func,) + +#define PROCESS_SIMD_I2_1I1toI2_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int64x2_2Int1, suffix); \ + SIMDValue result {0}; \ + SIMDValue src = GetRegRawSimd(playout->I2_1); \ + func(&result, &src, GetRegRawInt(playout->I2)); \ + SetRegRawSimd(playout->I2_0, result); \ + break; \ + } +#define PROCESS_SIMD_I2_1I1toI2_1(name, func) PROCESS_SIMD_I2_1I1toI2_1_COMMON(name, func,) + #define PROCESS_SIMD_F1toF4_1_COMMON(name, func, suffix) \ case OpCodeAsmJs::name: \ { \ @@ -1412,6 +1489,15 @@ if (switchProfileMode) \ } #define PROCESS_SIMD_B16_1toI1(name, func, suffix) PROCESS_SIMD_B16_1toI1_COMMON(name, func, suffix) +#define PROCESS_SIMD_D2_1I1D1toD2_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Float64x2_2Int1Double1, suffix); \ + SetRegRawSimd(playout->D2_0, func(GetRegRawSimd(playout->D2_1), GetRegRawInt(playout->I2), GetRegRawDouble(playout->D3))); \ + break; \ + } +#define PROCESS_SIMD_D2_1I1D1toD2_1_1(name, func) PROCESS_SIMD_D2_1I1D1toD2_1_COMMON(name, func, ) + #define PROCESS_SIMD_F4_1toF4_1_COMMON(name, func, suffix) \ case OpCodeAsmJs::name: \ { \ @@ -1531,6 +1617,15 @@ if (switchProfileMode) \ } #define PROCESS_SIMD_F4_2toB4_1(name, func) PROCESS_SIMD_F4_2toB4_1_COMMON(name, func,) +#define PROCESS_SIMD_I4_3toI4_1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Int32x4_4, suffix); \ + SetRegRawSimd(playout->I4_0, func(GetRegRawSimd(playout->I4_1), GetRegRawSimd(playout->I4_2), GetRegRawSimd(playout->I4_3))); \ + break; \ + } +#define PROCESS_SIMD_I4_3toI4_1(name, func) PROCESS_SIMD_I4_3toI4_1_COMMON(name, func,) + #define PROCESS_SIMD_I4_2toI4_1_COMMON(name, func, suffix) \ case OpCodeAsmJs::name: \ { \ @@ -2175,6 +2270,15 @@ if (switchProfileMode) \ } #define PROCESS_SIMD_I16_1I1toI16_1(name, func) PROCESS_SIMD_I16_1I1toI16_1_COMMON(name, func,) +#define PROCESS_SIMD_D2_1I1toD1_COMMON(name, func, suffix) \ + case OpCodeAsmJs::name: \ + { \ + PROCESS_READ_LAYOUT_ASMJS(name, Double1Float64x2_1Int1, suffix); \ + SetRegRawDouble(playout->D0, func(GetRegRawSimd(playout->D2_1), GetRegRawInt(playout->I2))); \ + break; \ + } +#define PROCESS_SIMD_D2_1I1toD1(name, func) PROCESS_SIMD_D2_1I1toD1_COMMON(name, func, ) + #define PROCESS_SIMD_U4_1I1toU4_1_COMMON(name, func, suffix) \ case OpCodeAsmJs::name: \ { \ diff --git a/lib/Runtime/Language/InterpreterStackFrame.cpp b/lib/Runtime/Language/InterpreterStackFrame.cpp index d50034e918c..c7bf3589f60 100644 --- a/lib/Runtime/Language/InterpreterStackFrame.cpp +++ b/lib/Runtime/Language/InterpreterStackFrame.cpp @@ -14,7 +14,7 @@ #include "Language/InterpreterStackFrame.h" #include "Library/JavascriptGeneratorFunction.h" #include "Library/ForInObjectEnumerator.h" - +#include "../../WasmReader/WasmParseTree.h" ///---------------------------------------------------------------------------- /// /// macros PROCESS_INtoOUT @@ -2077,13 +2077,20 @@ namespace Js case AsmJsRetType::Uint32x4: case AsmJsRetType::Uint16x8: case AsmJsRetType::Uint8x16: -#ifdef ENABLE_SIMDJS + +#if defined(ENABLE_WASM_SIMD) || defined(ENABLE_SIMDJS) + +#ifdef ENABLE_WASM_SIMD + if (CONFIG_FLAG(WasmSimd)) +#elif ENABLE_SIMDJS if (function->GetScriptContext()->GetConfig()->IsSimdjsEnabled()) +#endif { *(AsmJsSIMDValue*)retDst = asmJsReturn.simd; break; } #endif + Assert(UNREACHED); // double return case AsmJsRetType::Double: @@ -2266,13 +2273,18 @@ namespace Js *(AsmJsSIMDValue*)(&(m_outParams[outRegisterID])) = val; } - template + template void InterpreterStackFrame::OP_InvalidWasmTypeConversion(...) { - // Right now the only invalid wasm type conversion is with int64 - const char16* fromType = toJs ? _u("int64") : _u("Javascript Variable"); - const char16* toType = toJs ? _u("Javascript Variable") : _u("int64"); +#ifdef ENABLE_WASM + CompileAssert(type < Wasm::WasmTypes::Limit); + const char16* fromType = toJs ? Wasm::WasmTypes::GetTypeName(static_cast(type)) : _u("Javascript Variable"); + const char16* toType = toJs ? _u("Javascript Variable") : Wasm::WasmTypes::GetTypeName(static_cast(type)); JavascriptError::ThrowTypeErrorVar(scriptContext, WASMERR_InvalidTypeConversion, fromType, toType); +#else + Assert(UNREACHED); //shouldn't get there + JavascriptError::ThrowTypeErrorVar(scriptContext, WASMERR_InvalidTypeConversion, _u("unknown"), _u("unknown")); //throw for a release build +#endif } // This will be called in the beginning of the try_finally. @@ -2515,6 +2527,7 @@ namespace Js Field(Var)* localFunctionImports = moduleMemoryPtr + moduleMemory.mFFIOffset ; Field(Var)* localModuleFunctions = moduleMemoryPtr + moduleMemory.mFuncOffset ; Field(Field(Var)*)* localFunctionTables = (Field(Field(Var)*)*)(moduleMemoryPtr + moduleMemory.mFuncPtrOffset) ; + #ifdef ENABLE_SIMDJS AsmJsSIMDValue* localSimdSlots = nullptr; @@ -2994,7 +3007,7 @@ namespace Js int64* int64Arg = m_localInt64Slots + info->GetTypedSlotInfo(WAsmJs::INT64)->constCount; double* doubleArg = m_localDoubleSlots + info->GetTypedSlotInfo(WAsmJs::FLOAT64)->constCount; float* floatArg = m_localFloatSlots + info->GetTypedSlotInfo(WAsmJs::FLOAT32)->constCount; -#if _M_X64 +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) AsmJsSIMDValue* simdArg = m_localSimdSlots + info->GetTypedSlotInfo(WAsmJs::SIMD)->constCount; #endif // Move the arguments to the right location @@ -3099,13 +3112,24 @@ namespace Js } else { +#if defined (ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) Assert(info->GetArgType(i).isSIMD()); *simdArg = *(AsmJsSIMDValue*)floatSpillAddress; ++simdArg; homingAreaSize += sizeof(AsmJsSIMDValue); +#else + Assert(UNREACHED); +#endif } #ifdef ENABLE_SIMDJS if (scriptContext->GetConfig()->IsSimdjsEnabled() && i == 2) // last argument ? +#endif + +#ifdef ENABLE_WASM_SIMD + if (CONFIG_FLAG(WasmSimd) && i == 2) // last argument ? +#endif + +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) { // If we have simd arguments, the homing area in m_inParams can be larger than 3 64-bit slots. This is because SIMD values are unboxed there too. // After unboxing, the homing area is overwritten by rdx, r8 and r9, and we read/skip 64-bit slots from the homing area (argAddress += MachPtr). @@ -3171,6 +3195,13 @@ namespace Js } #ifdef ENABLE_SIMDJS else if (scriptContext->GetConfig()->IsSimdjsEnabled() && info->GetArgType(i).isSIMD()) +#endif + +#ifdef ENABLE_WASM_SIMD + else if (CONFIG_FLAG(WasmSimd) && info->GetArgType(i).isSIMD()) +#endif + +#if defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) { *simdArg = *(AsmJsSIMDValue*)argAddress; ++simdArg; @@ -3770,7 +3801,7 @@ namespace Js case AsmJsRetType::Float: m_localFloatSlots[returnReg] = JavascriptFunction::CallAsmJsFunction(function, entrypointInfo->jsMethod, m_outParams, alignedArgsSize, reg); break; -#ifdef ENABLE_SIMDJS +#if defined (ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD) case AsmJsRetType::Float32x4: case AsmJsRetType::Int32x4: case AsmJsRetType::Bool32x4: @@ -3782,12 +3813,14 @@ namespace Js case AsmJsRetType::Uint32x4: case AsmJsRetType::Uint16x8: case AsmJsRetType::Uint8x16: +#if _WIN32 //WASM.SIMD ToDo: Enable thunk for Xplat #if _M_X64 X86SIMDValue simdVal; simdVal.m128_value = JavascriptFunction::CallAsmJsFunction<__m128>(function, entrypointInfo->jsMethod, m_outParams, alignedArgsSize, reg); m_localSimdSlots[returnReg] = X86SIMDValue::ToSIMDValue(simdVal); #else m_localSimdSlots[returnReg] = JavascriptFunction::CallAsmJsFunction(function, entrypointInfo->jsMethod, m_outParams, alignedArgsSize, reg); +#endif #endif break; #endif @@ -8006,12 +8039,18 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId) void InterpreterStackFrame::OP_SimdLdArrGeneric(const unaligned T* playout) { Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT); - const uint64 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType]; - JavascriptArrayBuffer* arr = GetAsmJsBuffer(); + const uint64 index = ((uint64)(uint32)GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & (int64)(int)ArrayBufferView::ViewMask[playout->ViewType]; + + JavascriptArrayBuffer* arr = +#ifdef ENABLE_WASM_SIMD + (m_functionBody->IsWasmFunction()) ? + m_wasmMemory->GetBuffer() : +#endif + GetAsmJsBuffer(); + BYTE* buffer = arr->GetBuffer(); uint8 dataWidth = playout->DataWidth; RegSlot dstReg = playout->Value; - if (index + dataWidth > arr->GetByteLength()) { JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("Simd typed array access")); @@ -8048,8 +8087,15 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId) void InterpreterStackFrame::OP_SimdStArrGeneric(const unaligned T* playout) { Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT); - const uint64 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType]; - JavascriptArrayBuffer* arr = GetAsmJsBuffer(); + const uint64 index = ((uint64)(uint32)GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & (int64)(int)ArrayBufferView::ViewMask[playout->ViewType]; + + JavascriptArrayBuffer* arr = +#ifdef ENABLE_WASM_SIMD + (m_functionBody->IsWasmFunction()) ? + m_wasmMemory->GetBuffer() : +#endif + GetAsmJsBuffer(); + BYTE* buffer = arr->GetBuffer(); uint8 dataWidth = playout->DataWidth; RegSlot srcReg = playout->Value; @@ -8083,13 +8129,33 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId) } + + bool InterpreterStackFrame::SIMDAnyNaN(AsmJsSIMDValue& input) + { + if (!GetFunctionBody()->IsWasmFunction()) + { + return false; + } + + AsmJsSIMDValue compResult = SIMDFloat32x4Operation::OpEqual(input, input); + return !SIMDBool32x4Operation::OpAllTrue(compResult); + } + // handler for SIMD.Int32x4.FromFloat32x4 template void InterpreterStackFrame::OP_SimdInt32x4FromFloat32x4(const unaligned T* playout) { bool throws = false; AsmJsSIMDValue input = GetRegRawSimd(playout->F4_1); - AsmJsSIMDValue result = SIMDInt32x4Operation::OpFromFloat32x4(input, throws); + AsmJsSIMDValue result{ 0 }; + +#ifdef ENABLE_WASM_SIMD + throws = SIMDAnyNaN(input); + if (!throws) +#endif + { + result = SIMDInt32x4Operation::OpFromFloat32x4(input, throws); + } // value is out of bound if (throws) @@ -8104,9 +8170,16 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId) { bool throws = false; AsmJsSIMDValue input = GetRegRawSimd(playout->F4_1); - AsmJsSIMDValue result = SIMDUint32x4Operation::OpFromFloat32x4(input, throws); + AsmJsSIMDValue result{ 0 }; + +#ifdef ENABLE_WASM_SIMD + throws = SIMDAnyNaN(input); + if (!throws) +#endif + { + result = SIMDUint32x4Operation::OpFromFloat32x4(input, throws); + } - // value is out of bound if (throws) { JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("SIMD.Int32x4.FromFloat32x4")); @@ -8114,6 +8187,25 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId) SetRegRawSimd(playout->U4_0, result); } + template + void InterpreterStackFrame::OP_WasmSimdConst(const unaligned T* playout) + { + AsmJsSIMDValue result{ playout->C1, playout->C2, playout->C3, playout->C4 }; + SetRegRawSimd(playout->F4_0, result); + } + + template + void InterpreterStackFrame::OP_SimdShuffleV8X16(const unaligned T* playout) + { + uint32 lanes[Wasm::Simd::MAX_LANES]; + for (uint32 i = 0; i < Wasm::Simd::MAX_LANES; i++) + { + Assert(playout->INDICES[i] < Wasm::Simd::MAX_LANES * 2); + lanes[i] = playout->INDICES[i]; + } + SetRegRawSimd(playout->R0, SIMDUtils::SIMD128InnerShuffle(GetRegRawSimd(playout->R1), GetRegRawSimd(playout->R2), Wasm::Simd::MAX_LANES, lanes)); + } + template void InterpreterStackFrame::OP_SimdInt16x8(const unaligned T* playout) { diff --git a/lib/Runtime/Language/InterpreterStackFrame.h b/lib/Runtime/Language/InterpreterStackFrame.h index 2ce4346a38f..d26bcc8f5bf 100644 --- a/lib/Runtime/Language/InterpreterStackFrame.h +++ b/lib/Runtime/Language/InterpreterStackFrame.h @@ -211,7 +211,7 @@ namespace Js void OP_I_SetOutAsmFlt(RegSlot outRegisterID, float val); void OP_I_SetOutAsmLong(RegSlot outRegisterID, int64 val); void OP_I_SetOutAsmSimd(RegSlot outRegisterID, AsmJsSIMDValue val); - template + template //type is int to avoid including Wasm headers void OP_InvalidWasmTypeConversion(...); void OP_WasmPrintFunc(int index); template void OP_WasmPrintFunc(const unaligned T* playout) { OP_WasmPrintFunc((int)playout->I1); } @@ -257,12 +257,16 @@ namespace Js template AsmJsSIMDValue GetRegRawSimd(RegSlotType localRegisterID) const; template void SetRegRawSimd(RegSlotType localRegisterID, AsmJsSIMDValue bValue); + template void OP_SimdLdArrGeneric(const unaligned T* playout); template void OP_SimdLdArrConstIndex(const unaligned T* playout); template void OP_SimdStArrGeneric(const unaligned T* playout); template void OP_SimdStArrConstIndex(const unaligned T* playout); + bool SIMDAnyNaN(AsmJsSIMDValue& input); template void OP_SimdInt32x4FromFloat32x4(const unaligned T* playout); template void OP_SimdUint32x4FromFloat32x4(const unaligned T* playout); + template void OP_WasmSimdConst(const unaligned T* playout); + template void OP_SimdShuffleV8X16(const unaligned T* playout); template void OP_SimdInt16x8(const unaligned T* playout); template void OP_SimdInt8x16(const unaligned T* playout); diff --git a/lib/Runtime/Language/SimdBool32x4Operation.cpp b/lib/Runtime/Language/SimdBool32x4Operation.cpp index 5eeac0dbe62..40ebc13f038 100644 --- a/lib/Runtime/Language/SimdBool32x4Operation.cpp +++ b/lib/Runtime/Language/SimdBool32x4Operation.cpp @@ -28,14 +28,26 @@ namespace Js } // Unary Ops - bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& simd) + template + bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& val) { + SIMDValue simd = SIMDUtils::CanonicalizeToBools(val); //copy-by-value since we need to modify the copy return simd.i32[SIMD_X] || simd.i32[SIMD_Y] || simd.i32[SIMD_Z] || simd.i32[SIMD_W]; } - bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& simd) + template + bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& val) { + SIMDValue simd = SIMDUtils::CanonicalizeToBools(val); //copy-by-value since we need to modify the copy return simd.i32[SIMD_X] && simd.i32[SIMD_Y] && simd.i32[SIMD_Z] && simd.i32[SIMD_W]; } + + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& val); + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& val); + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& val); + // + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& val); + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& val); + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& val); } #endif diff --git a/lib/Runtime/Language/SimdBool32x4Operation.h b/lib/Runtime/Language/SimdBool32x4Operation.h index 222e8bc5d67..643ff6493cc 100644 --- a/lib/Runtime/Language/SimdBool32x4Operation.h +++ b/lib/Runtime/Language/SimdBool32x4Operation.h @@ -14,7 +14,9 @@ namespace Js { // Unary Ops static SIMDValue OpNot(const SIMDValue& v); + template static bool OpAnyTrue(const SIMDValue& v); + template static bool OpAllTrue(const SIMDValue& v); // Binary Ops diff --git a/lib/Runtime/Language/SimdBool32x4OperationX86X64.cpp b/lib/Runtime/Language/SimdBool32x4OperationX86X64.cpp index 953a3fb7901..a449c105e22 100644 --- a/lib/Runtime/Language/SimdBool32x4OperationX86X64.cpp +++ b/lib/Runtime/Language/SimdBool32x4OperationX86X64.cpp @@ -24,19 +24,33 @@ namespace Js } // Unary Ops + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& simd) { - X86SIMDValue x86Simd = X86SIMDValue::ToX86SIMDValue(simd); + SIMDValue canonSimd = SIMDUtils::CanonicalizeToBools(simd); //copy-by-value since we need to modify the copy + X86SIMDValue x86Simd = X86SIMDValue::ToX86SIMDValue(canonSimd); int mask_8 = _mm_movemask_epi8(x86Simd.m128i_value); //latency 3, throughput 1 return mask_8 != 0; } + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& simd) { - X86SIMDValue x86Simd = X86SIMDValue::ToX86SIMDValue(simd); + SIMDValue canonSimd = SIMDUtils::CanonicalizeToBools(simd); //copy-by-value since we need to modify the copy + X86SIMDValue x86Simd = X86SIMDValue::ToX86SIMDValue(canonSimd); int mask_8 = _mm_movemask_epi8(x86Simd.m128i_value); //latency 3, throughput 1 return mask_8 == 0xFFFF; } + + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& simd); + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& simd); + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& simd); + template bool SIMDBool32x4Operation::OpAllTrue(const SIMDValue& simd); + // + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& simd); + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& simd); + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& simd); + template bool SIMDBool32x4Operation::OpAnyTrue(const SIMDValue& simd); } diff --git a/lib/Runtime/Language/SimdFloat64x2Operation.h b/lib/Runtime/Language/SimdFloat64x2Operation.h index c6a315ae8d4..de32f63ed64 100644 --- a/lib/Runtime/Language/SimdFloat64x2Operation.h +++ b/lib/Runtime/Language/SimdFloat64x2Operation.h @@ -43,6 +43,7 @@ namespace Js { static SIMDValue OpGreaterThan(const SIMDValue& aValue, const SIMDValue& bValue); static SIMDValue OpGreaterThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue); static SIMDValue OpSelect(const SIMDValue& mV, const SIMDValue& tV, const SIMDValue& fV); + template static void OpConv(SIMDValue* dst, SIMDValue* src); }; } // namespace Js diff --git a/lib/Runtime/Language/SimdFloat64x2OperationX86X64.cpp b/lib/Runtime/Language/SimdFloat64x2OperationX86X64.cpp index 25f6c48101d..79808eac9c3 100644 --- a/lib/Runtime/Language/SimdFloat64x2OperationX86X64.cpp +++ b/lib/Runtime/Language/SimdFloat64x2OperationX86X64.cpp @@ -318,6 +318,16 @@ namespace Js return X86SIMDValue::ToSIMDValue(x86Result); } + template + void SIMDFloat64x2Operation::OpConv(SIMDValue* dst, SIMDValue* src) + { + dst->f64[0] = (double) (T)src->i64[0]; + dst->f64[1] = (double) (T)src->i64[1]; + } + + template void SIMDFloat64x2Operation::OpConv(SIMDValue* dst, SIMDValue* src); + template void SIMDFloat64x2Operation::OpConv(SIMDValue* dst, SIMDValue* src); + } #endif diff --git a/lib/Runtime/Language/SimdInt64x2Operation.cpp b/lib/Runtime/Language/SimdInt64x2Operation.cpp new file mode 100755 index 00000000000..0175291dcb8 --- /dev/null +++ b/lib/Runtime/Language/SimdInt64x2Operation.cpp @@ -0,0 +1,119 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +#include "RuntimeLanguagePch.h" + +namespace Js +{ + SIMDValue SIMDInt64x2Operation::OpSplat(int64 val) + { + SIMDValue result; + result.i64[0] = val; + result.i64[1] = val; + return result; + } + + SIMDValue SIMDInt64x2Operation::OpAdd(const SIMDValue& a, const SIMDValue& b) + { + SIMDValue result; + result.i64[0] = a.i64[0] + b.i64[0]; + result.i64[1] = a.i64[1] + b.i64[1]; + return result; + } + + SIMDValue SIMDInt64x2Operation::OpSub(const SIMDValue& a, const SIMDValue& b) + { + SIMDValue result; + result.i64[0] = a.i64[0] - b.i64[0]; + result.i64[1] = a.i64[1] - b.i64[1]; + return result; + } + + SIMDValue SIMDInt64x2Operation::OpNeg(const SIMDValue& a) + { + SIMDValue result; + result.i64[0] = -a.i64[0]; + result.i64[1] = -a.i64[1]; + return result; + } + + static bool IsInRange(double val, uint64& out) + { + if (val != val || val <= (double)0) + { + out = 0; + return false; + } + + if (val >= (double)ULLONG_MAX) + { + out = ULLONG_MAX; + return false; + } + + return true; + } + + static bool IsInRange(double val, int64& out) + { + if (val != val) + { + out = 0; + return false; + } + + if (val <= (double)LLONG_MIN) + { + out = LLONG_MIN; + return false; + } + + if (val >= (double)LLONG_MAX) + { + out = LLONG_MAX; + return false; + } + + return true; + } + + template + void SIMDInt64x2Operation::OpTrunc(SIMDValue* dst, SIMDValue* src) + { + T convertedVal; + dst->i64[0] = IsInRange(src->f64[0], convertedVal) ? (T)src->f64[0] : convertedVal; + dst->i64[1] = IsInRange(src->f64[1], convertedVal) ? (T)src->f64[1] : convertedVal; + } + + template void SIMDInt64x2Operation::OpTrunc(SIMDValue* dst, SIMDValue* src); + template void SIMDInt64x2Operation::OpTrunc(SIMDValue* dst, SIMDValue* src); + + void SIMDInt64x2Operation::OpShiftLeftByScalar(SIMDValue* dst, SIMDValue* src, int count) + { + count = count & SIMDUtils::SIMDGetShiftAmountMask(8); + dst->i64[0] = src->i64[0] << count; + dst->i64[1] = src->i64[1] << count; + } + + void SIMDInt64x2Operation::OpShiftRightByScalar(SIMDValue* dst, SIMDValue* src, int count) + { + count = count & SIMDUtils::SIMDGetShiftAmountMask(8); + dst->i64[0] = src->i64[0] >> count; + dst->i64[1] = src->i64[1] >> count; + } + + void SIMDInt64x2Operation::OpShiftRightByScalarU(SIMDValue* dst, SIMDValue* src, int count) + { + count = count & SIMDUtils::SIMDGetShiftAmountMask(8); + dst->i64[0] = (uint64)src->i64[0] >> count; + dst->i64[1] = (uint64)src->i64[1] >> count; + } + + void SIMDInt64x2Operation::OpReplaceLane(SIMDValue* dst, SIMDValue* src, int64 val, uint index) + { + dst->SetValue(*src); + dst->i64[index] = val; + } +} diff --git a/lib/Runtime/Language/SimdInt64x2Operation.h b/lib/Runtime/Language/SimdInt64x2Operation.h new file mode 100755 index 00000000000..3f9b08de0cb --- /dev/null +++ b/lib/Runtime/Language/SimdInt64x2Operation.h @@ -0,0 +1,26 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- +#pragma once + +namespace Js { + + struct SIMDInt64x2Operation + { + static SIMDValue OpSplat(int64 x); + static SIMDValue OpAdd(const SIMDValue& a, const SIMDValue& b); + static SIMDValue OpSub(const SIMDValue& a, const SIMDValue& b); + static SIMDValue OpNeg(const SIMDValue& a); + + //These are directly called into from JIT as of now. Pointer-based arguments are used to ensure + //that calling conventions are consistent across x86/x64 and match call sequences JIT generates. + //TODO: Change back to "const SIMDValue& a" after getting rid of the jit helpers. + template static void OpTrunc(SIMDValue* dst, SIMDValue* src); + static void OpShiftLeftByScalar(SIMDValue* dst, SIMDValue* src, int count); + static void OpShiftRightByScalar(SIMDValue* dst, SIMDValue* src, int count); + static void OpShiftRightByScalarU(SIMDValue* dst, SIMDValue* src, int count); + static void OpReplaceLane(SIMDValue* dst, SIMDValue* src, int64 val, uint index); + }; + +} // namespace Js diff --git a/lib/Runtime/Language/SimdOps.h b/lib/Runtime/Language/SimdOps.h index f5a3deff7dc..7e08fd37c17 100644 --- a/lib/Runtime/Language/SimdOps.h +++ b/lib/Runtime/Language/SimdOps.h @@ -16,3 +16,4 @@ #include "Language/SimdBool32x4Operation.h" #include "Language/SimdBool16x8Operation.h" #include "Language/SimdBool8x16Operation.h" +#include "Language/SimdInt64x2Operation.h" diff --git a/lib/Runtime/Language/SimdUtils.h b/lib/Runtime/Language/SimdUtils.h index dc991d231a1..5ef8df84004 100644 --- a/lib/Runtime/Language/SimdUtils.h +++ b/lib/Runtime/Language/SimdUtils.h @@ -28,7 +28,8 @@ Field(uint16) u16[8];\ Field(uint8) u8[16];\ Field(float) f32[4];\ - Field(double) f64[2]; + Field(double) f64[2]; \ + Field(int64) i64[2]; #define SIMD_TEMP_SIZE 3 struct _SIMDValue { @@ -146,7 +147,7 @@ const _x86_SIMDValue X86_4LANES_MASKS[] = {{ 0xffffffff, 0x00000000, 0x00000 #pragma warning(pop) -#if ENABLE_NATIVE_CODEGEN && defined(ENABLE_SIMDJS) +#if ENABLE_NATIVE_CODEGEN && (defined(ENABLE_SIMDJS) || defined(ENABLE_WASM_SIMD)) // auxiliary SIMD values in memory to help JIT'ed code. E.g. used for Int8x16 shuffle. extern _x86_SIMDValue X86_TEMP_SIMD[]; #endif @@ -225,29 +226,79 @@ namespace Js { //////////////////////////////////////////// static inline SIMDValue SIMD128InnerReplaceLaneF4(SIMDValue simdVal, const uint32 lane, const float value) { + Assert(lane < 4); simdVal.f32[lane] = value; return simdVal; }; + static inline SIMDValue SIMD128InnerReplaceLaneD2(SIMDValue simdVal, const uint32 lane, const double value) + { + Assert(lane < 2); + simdVal.f64[lane] = value; + return simdVal; + }; + + static inline SIMDValue SIMD128InnerReplaceLaneI2(SIMDValue simdVal, const uint32 lane, const int64 value) + { + Assert(lane < 2); + simdVal.i64[lane] = value; + return simdVal; + }; static inline SIMDValue SIMD128InnerReplaceLaneI4(SIMDValue simdVal, const uint32 lane, const int32 value) { + Assert(lane < 4); simdVal.i32[lane] = value; return simdVal; }; static inline SIMDValue SIMD128InnerReplaceLaneI8(SIMDValue simdVal, const uint32 lane, const int16 value) { + Assert(lane < 8); simdVal.i16[lane] = value; return simdVal; }; static inline SIMDValue SIMD128InnerReplaceLaneI16(SIMDValue simdVal, const uint32 lane, const int8 value) { + Assert(lane < 16); simdVal.i8[lane] = value; return simdVal; }; - static inline float SIMD128InnerExtractLaneF4(const SIMDValue src1, const uint32 lane) { return src1.f32[lane]; }; - static inline int32 SIMD128InnerExtractLaneI4(const SIMDValue src1, const uint32 lane) { return src1.i32[lane]; }; - static inline int16 SIMD128InnerExtractLaneI8(const SIMDValue src1, const uint32 lane) { return src1.i16[lane]; }; - static inline int8 SIMD128InnerExtractLaneI16(const SIMDValue src1, const uint32 lane) { return src1.i8[lane]; }; + static inline int32 SIMD128InnerExtractLaneB4(const SIMDValue src1, const uint32 lane) + { + Assert(lane < 4); + int val = SIMD128InnerExtractLaneI4(src1, lane); + return val ? 1 : 0; + }; + + static inline int16 SIMD128InnerExtractLaneB8(const SIMDValue src1, const uint32 lane) + { + Assert(lane < 8); + int16 val = SIMD128InnerExtractLaneI8(src1, lane); + return val ? 1 : 0; + }; + + static inline int8 SIMD128InnerExtractLaneB16(const SIMDValue src1, const uint32 lane) + { + Assert(lane < 16); + int8 val = SIMD128InnerExtractLaneI16(src1, lane); + return val ? 1 : 0; + }; + + static inline double SIMD128InnerExtractLaneD2(const SIMDValue src1, const uint32 lane) { Assert(lane < 2); return src1.f64[lane]; }; + static inline float SIMD128InnerExtractLaneF4(const SIMDValue src1, const uint32 lane) { Assert(lane < 4); return src1.f32[lane]; }; + static inline int64 SIMD128InnerExtractLaneI2(const SIMDValue src1, const uint32 lane) { Assert(lane < 2); return src1.i64[lane]; }; + static inline int32 SIMD128InnerExtractLaneI4(const SIMDValue src1, const uint32 lane) { Assert(lane < 4); return src1.i32[lane]; }; + static inline int16 SIMD128InnerExtractLaneI8(const SIMDValue src1, const uint32 lane) { Assert(lane < 8); return src1.i16[lane]; }; + static inline int8 SIMD128InnerExtractLaneI16(const SIMDValue src1, const uint32 lane) { Assert(lane < 16); return src1.i8[lane]; }; + + static inline SIMDValue SIMD128BitSelect(const SIMDValue src1, const SIMDValue src2, const SIMDValue mask) + { + SIMDValue res{ 0 }; + res.i32[0] = (src1.i32[0] & mask.i32[0]) | (src2.i32[0] & ~mask.i32[0]); + res.i32[1] = (src1.i32[1] & mask.i32[1]) | (src2.i32[1] & ~mask.i32[1]); + res.i32[2] = (src1.i32[2] & mask.i32[2]) | (src2.i32[2] & ~mask.i32[2]); + res.i32[3] = (src1.i32[3] & mask.i32[3]) | (src2.i32[3] & ~mask.i32[3]); + return res; + } template static inline T SIMD128ExtractLane(const Var src, const Var lane, ScriptContext* scriptContext) @@ -311,6 +362,27 @@ namespace Js { return SIMDType::New(&result, scriptContext); } + template + static SIMDValue CanonicalizeToBools(SIMDValue val) + { +#ifdef ENABLE_WASM_SIMD + + CompileAssert(sizeof(T) <= sizeof(SIMDValue)); + CompileAssert(sizeof(SIMDValue) % sizeof(T) == 0); + T* cursor = (T*)val.i8; + const uint maxBytes = 16; + uint size = maxBytes / sizeof(T); + + for (uint i = 0; i < size; i++) + { + cursor[i] = cursor[i] ? (T) -1 : 0; + } + return val; +#else + return val; +#endif + } + template static void SIMD128TypedArrayStore(Var arg1, Var arg2, Var simdVar, uint32 dataWidth, ScriptContext *scriptContext) { diff --git a/lib/Runtime/Language/ValueType.cpp b/lib/Runtime/Language/ValueType.cpp index 4895e6529b1..b7c0946cdfd 100644 --- a/lib/Runtime/Language/ValueType.cpp +++ b/lib/Runtime/Language/ValueType.cpp @@ -755,6 +755,8 @@ bool ValueType::IsSimd128(IRType type) const return IsSimd128Uint8x16(); case TySimd128D2: return IsSimd128Float64x2(); + case TySimd128I2: + return IsSimd128Int64x2(); default: Assert(UNREACHED); return false; @@ -801,6 +803,12 @@ bool ValueType::IsSimd128Float64x2() const return IsObject() && GetObjectType() == ObjectType::Simd128Float64x2; } +bool ValueType::IsSimd128Int64x2() const +{ + return IsObject() && GetObjectType() == ObjectType::Simd128Int64x2; +} + + bool ValueType::IsLikelySimd128() const { return IsLikelyObject() && (GetObjectType() >= ObjectType::Simd128Float32x4 && GetObjectType() <= ObjectType::Simd128Float64x2); @@ -840,6 +848,11 @@ bool ValueType::IsLikelySimd128Float64x2() const { return IsLikelyObject() && GetObjectType() == ObjectType::Simd128Float64x2; } + +bool ValueType::IsLikelySimd128Int64x2() const +{ + return IsLikelyObject() && GetObjectType() == ObjectType::Simd128Int64x2; +} #endif ObjectType ValueType::GetObjectType() const diff --git a/lib/Runtime/Language/ValueType.h b/lib/Runtime/Language/ValueType.h index f789653575c..cd27a0add09 100644 --- a/lib/Runtime/Language/ValueType.h +++ b/lib/Runtime/Language/ValueType.h @@ -179,6 +179,7 @@ class ValueType bool IsSimd128Uint16x8() const; bool IsSimd128Uint8x16() const; bool IsSimd128Float64x2() const; + bool IsSimd128Int64x2() const; bool IsLikelySimd128() const; @@ -189,6 +190,7 @@ class ValueType bool IsLikelySimd128Uint16x8() const; bool IsLikelySimd128Uint8x16() const; bool IsLikelySimd128Float64x2() const; + bool IsLikelySimd128Int64x2() const; #endif bool HasBeenObject() const; diff --git a/lib/Runtime/Language/ValueTypes.h b/lib/Runtime/Language/ValueTypes.h index af7619ef16b..39ba4648f3e 100644 --- a/lib/Runtime/Language/ValueTypes.h +++ b/lib/Runtime/Language/ValueTypes.h @@ -121,6 +121,7 @@ OBJECT_TYPE(Simd128Uint8x16 ) OBJECT_TYPE(Simd128Bool32x4 ) OBJECT_TYPE(Simd128Bool16x8 ) OBJECT_TYPE(Simd128Bool8x16 ) +OBJECT_TYPE(Simd128Int64x2 ) OBJECT_TYPE(Simd128Float64x2 ) // !! This is a marker for last SIMD type. Insert new SIMD types above. diff --git a/lib/Runtime/Library/SimdBool16x8Lib.cpp b/lib/Runtime/Library/SimdBool16x8Lib.cpp index 4ae88878728..0188c4f2480 100644 --- a/lib/Runtime/Library/SimdBool16x8Lib.cpp +++ b/lib/Runtime/Library/SimdBool16x8Lib.cpp @@ -131,7 +131,7 @@ namespace Js JavascriptSIMDBool16x8 *a = JavascriptSIMDBool16x8::FromVar(args[1]); Assert(a); - bool result = SIMDBool32x4Operation::OpAllTrue(a->GetValue()); + bool result = SIMDBool32x4Operation::OpAllTrue(a->GetValue()); return JavascriptBoolean::ToVar(result, scriptContext); } diff --git a/lib/Runtime/Library/SimdBool8x16Lib.cpp b/lib/Runtime/Library/SimdBool8x16Lib.cpp index c62d5493e8b..11ba2888b8e 100644 --- a/lib/Runtime/Library/SimdBool8x16Lib.cpp +++ b/lib/Runtime/Library/SimdBool8x16Lib.cpp @@ -131,7 +131,7 @@ namespace Js JavascriptSIMDBool8x16 *a = JavascriptSIMDBool8x16::FromVar(args[1]); Assert(a); - bool result = SIMDBool32x4Operation::OpAllTrue(a->GetValue()); + bool result = SIMDBool32x4Operation::OpAllTrue(a->GetValue()); return JavascriptBoolean::ToVar(result, scriptContext); } diff --git a/lib/Runtime/Library/amd64/JavascriptFunctionA.S b/lib/Runtime/Library/amd64/JavascriptFunctionA.S index 17edad70ced..48aaba133a6 100644 --- a/lib/Runtime/Library/amd64/JavascriptFunctionA.S +++ b/lib/Runtime/Library/amd64/JavascriptFunctionA.S @@ -184,6 +184,13 @@ LEAF_ENTRY _ZN2Js18JavascriptFunction17CallAsmJsFunctionIdEET_PNS_16RecyclableOb jmp C_FUNC(_ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEPS5_jPh) LEAF_END _ZN2Js18JavascriptFunction17CallAsmJsFunctionIdEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEPS5_jPh, _TEXT +// AsmJsSIMDValue CallAsmJsFunction(RecyclableObject *function, JavascriptMethod entryPoint, uint argc, Var *argv); +.balign 16 +LEAF_ENTRY _ZN2Js18JavascriptFunction17CallAsmJsFunctionIDv4_fEET_PNS_16RecyclableObjectEPFPvS5_NS_8CallInfoEzEjPS6_, _TEXT + int 3 //TODO: Verify this code path when enabling WASM.SIMD for xplat + jmp C_FUNC(_ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEPS5_jPh) +LEAF_END _ZN2Js18JavascriptFunction17CallAsmJsFunctionIDv4_fEET_PNS_16RecyclableObjectEPFPvS5_NS_8CallInfoEzEjPS6_, _TEXT + #endif // _ENABLE_DYNAMIC_THUNKS //------------------------------------------------------------------------------ .balign 16 diff --git a/lib/WasmReader/Chakra.WasmReader.vcxproj b/lib/WasmReader/Chakra.WasmReader.vcxproj index 2d2e6b0df09..b28f148e395 100644 --- a/lib/WasmReader/Chakra.WasmReader.vcxproj +++ b/lib/WasmReader/Chakra.WasmReader.vcxproj @@ -68,6 +68,7 @@ + diff --git a/lib/WasmReader/Chakra.WasmReader.vcxproj.filters b/lib/WasmReader/Chakra.WasmReader.vcxproj.filters index db751263fff..8b1e3471e5c 100644 --- a/lib/WasmReader/Chakra.WasmReader.vcxproj.filters +++ b/lib/WasmReader/Chakra.WasmReader.vcxproj.filters @@ -8,6 +8,7 @@ + diff --git a/lib/WasmReader/EmptyWasmByteCodeWriter.h b/lib/WasmReader/EmptyWasmByteCodeWriter.h index a096573bbbf..1414de459c7 100644 --- a/lib/WasmReader/EmptyWasmByteCodeWriter.h +++ b/lib/WasmReader/EmptyWasmByteCodeWriter.h @@ -27,7 +27,17 @@ namespace Js virtual void AsmReg1(OpCodeAsmJs op, RegSlot R0) override {} virtual void AsmReg2(OpCodeAsmJs op, RegSlot R0, RegSlot R1) override {} virtual void AsmReg3(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2) override {} + virtual void AsmReg4(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3) override {} + virtual void AsmReg5(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4) override {} + virtual void AsmReg9(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8) override {} + virtual void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8, + RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16) override {} + virtual void AsmShuffle(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, uint8 indices[]) override {} + virtual void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0) override {}; + virtual void WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3) override {}; + virtual void AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, uint32 slotId) override {} + virtual void AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op = OpCodeAsmJs::AsmBr) override {} virtual void AsmBrReg1(OpCodeAsmJs op, ByteCodeLabel labelID, RegSlot R1) override {} virtual void AsmBrReg1Const1(OpCodeAsmJs op, ByteCodeLabel labelID, RegSlot R1, int C1) override {} diff --git a/lib/WasmReader/WasmBinaryOpCodes.h b/lib/WasmReader/WasmBinaryOpCodes.h index 516d0f7f0db..1550d62d579 100755 --- a/lib/WasmReader/WasmBinaryOpCodes.h +++ b/lib/WasmReader/WasmBinaryOpCodes.h @@ -150,7 +150,6 @@ WASM_MISC_OPCODE(I64Const, 0x42, Limit, false) WASM_MISC_OPCODE(F32Const, 0x43, Limit, false) WASM_MISC_OPCODE(F64Const, 0x44, Limit, false) -//////////////////////////////////////////////////////////// // Comparison operators WASM_UNARY__OPCODE(I32Eqz, 0x45, I_I , Eqz_Int , false) WASM_BINARY_OPCODE(I32Eq, 0x46, I_II, CmEq_Int , false) @@ -311,11 +310,21 @@ WASM_EMPTY__OPCODE(PrintBeginCall , 0xf2, PrintBeginCall , false) WASM_EMPTY__OPCODE(PrintNewLine , 0xf3, PrintNewLine , false) WASM_UNARY__OPCODE(PrintEndCall , 0xf4, V_I , PrintEndCall , false) WASM_UNARY__OPCODE(PrintI32 , 0xfc, I_I , PrintI32 , false) -WASM_UNARY__OPCODE(PrintI64 , 0xfd, L_L , PrintI64 , false) +WASM_UNARY__OPCODE(PrintI64 , 0xef, L_L , PrintI64 , false) WASM_UNARY__OPCODE(PrintF32 , 0xfe, F_F , PrintF32 , false) WASM_UNARY__OPCODE(PrintF64 , 0xff, D_D , PrintF64 , false) #endif +//Extended +WASM_MISC_OPCODE(SimdStart, 0xfd, Limit, false) +WASM_MISC_OPCODE(Extended, 0x06, Limit, false) +WASM_MISC_OPCODE(Extended2, 0x07, Limit, false) + +//Simd +#ifdef ENABLE_WASM_SIMD +#include "WasmBinaryOpcodesSimd.h" +#endif + #undef WASM_OPCODE #undef WASM_SIGNATURE #undef WASM_CTRL_OPCODE diff --git a/lib/WasmReader/WasmBinaryOpcodesSimd.h b/lib/WasmReader/WasmBinaryOpcodesSimd.h new file mode 100644 index 00000000000..55d1e53a35c --- /dev/null +++ b/lib/WasmReader/WasmBinaryOpcodesSimd.h @@ -0,0 +1,189 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +#ifndef WASM_LANE_OPCODE +#define WASM_LANE_OPCODE(opname, opcode, sig, nyi) WASM_OPCODE(opname, opcode, sig, nyi) +#endif + +#ifndef WASM_EXTRACTLANE_OPCODE +#define WASM_EXTRACTLANE_OPCODE(opname, opcode, sig, asmjsop, nyi) WASM_LANE_OPCODE(opname, opcode, sig, nyi) +#endif + +#ifndef WASM_REPLACELANE_OPCODE +#define WASM_REPLACELANE_OPCODE(opname, opcode, sig, asmjsop, nyi) WASM_LANE_OPCODE(opname, opcode, sig, nyi) +#endif + +#ifndef WASM_SIMD_BUILD_OPCODE +#define WASM_SIMD_BUILD_OPCODE(opname, opcode, sig, asmjop, lanes, nyi) WASM_OPCODE(opname, opcode, sig, nyi) +#endif + +#ifndef WASM_SIMD_MEMREAD_OPCODE +#define WASM_SIMD_MEMREAD_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) WASM_MEM_OPCODE(opname, opcode, sig, nyi) +#endif + +#ifndef WASM_SIMD_MEMSTORE_OPCODE +#define WASM_SIMD_MEMSTORE_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) WASM_MEM_OPCODE(opname, opcode, sig, nyi) +#endif + +//SIMD Signatures +WASM_SIGNATURE(F_M128, 2, WasmTypes::F32, WasmTypes::M128) +WASM_SIGNATURE(I_M128, 2, WasmTypes::I32, WasmTypes::M128) +WASM_SIGNATURE(M128_I, 2, WasmTypes::M128, WasmTypes::I32) +WASM_SIGNATURE(M128_F, 2, WasmTypes::M128, WasmTypes::F32) +WASM_SIGNATURE(M128_L, 2, WasmTypes::M128, WasmTypes::I64) +WASM_SIGNATURE(M128_D, 2, WasmTypes::M128, WasmTypes::F64) +WASM_SIGNATURE(M128X3, 3, WasmTypes::M128, WasmTypes::M128, WasmTypes::M128) +WASM_SIGNATURE(M128_M128_I, 3, WasmTypes::M128, WasmTypes::M128, WasmTypes::I32) +WASM_SIGNATURE(M128_M128, 2, WasmTypes::M128, WasmTypes::M128) +WASM_SIGNATURE(L_M128, 2, WasmTypes::I64, WasmTypes::M128) +WASM_SIGNATURE(D_M128_I, 3, WasmTypes::F64, WasmTypes::M128, WasmTypes::I32) + +WASM_MISC_OPCODE(M128Const, 0x100, Limit, false) +WASM_SIMD_MEMREAD_OPCODE(M128Load, 0x101, M128_I, Simd128_LdArr_F4, Js::ArrayBufferView::TYPE_FLOAT32, 16, false) +WASM_SIMD_MEMSTORE_OPCODE(M128Store, 0x102, M128_I, Simd128_StArr_F4, Js::ArrayBufferView::TYPE_FLOAT32, 16, false) +WASM_UNARY__OPCODE(I16Splat, 0x103, M128_I, Simd128_Splat_I16, false) +WASM_UNARY__OPCODE(I8Splat, 0x104, M128_I, Simd128_Splat_I8, false) +WASM_UNARY__OPCODE(I4Splat, 0x105, M128_I, Simd128_Splat_I4, false) +WASM_UNARY__OPCODE(I2Splat, 0x106, M128_L, Simd128_Splat_I2, false) +WASM_UNARY__OPCODE(F4Splat, 0x107, M128_F, Simd128_Splat_F4, false) +WASM_UNARY__OPCODE(F2Splat, 0x108, M128_D, Simd128_Splat_D2, false) +WASM_EXTRACTLANE_OPCODE(I16ExtractLaneS, 0x109, I_M128, Simd128_ExtractLane_I16, false) +WASM_EXTRACTLANE_OPCODE(I16ExtractLaneU, 0x10a, I_M128, Simd128_ExtractLane_U16, false) +WASM_EXTRACTLANE_OPCODE(I8ExtractLaneS, 0x10b, I_M128, Simd128_ExtractLane_I8, false) +WASM_EXTRACTLANE_OPCODE(I8ExtractLaneU, 0x10c, I_M128, Simd128_ExtractLane_U8, false) +WASM_EXTRACTLANE_OPCODE(I4ExtractLane, 0x10d, I_M128, Simd128_ExtractLane_I4, false) +WASM_EXTRACTLANE_OPCODE(I2ExtractLane, 0x10e, L_M128, Simd128_ExtractLane_I2, false) +WASM_EXTRACTLANE_OPCODE(F4ExtractLane, 0x10f, I_M128, Simd128_ExtractLane_F4, false) +WASM_BINARY_OPCODE(F2ExtractLane, 0x110, D_M128_I, Simd128_ExtractLane_D2, false) +WASM_REPLACELANE_OPCODE(I16ReplaceLane, 0x111, M128_I, Simd128_ReplaceLane_I16, false) +WASM_REPLACELANE_OPCODE(I8ReplaceLane, 0x112, M128_I, Simd128_ReplaceLane_I8, false) +WASM_REPLACELANE_OPCODE(I4ReplaceLane, 0x113, M128_I, Simd128_ReplaceLane_I4, false) +WASM_REPLACELANE_OPCODE(I2ReplaceLane, 0x114, M128_L, Simd128_ReplaceLane_I2, false) +WASM_REPLACELANE_OPCODE(F4ReplaceLane, 0x115, M128_F, Simd128_ReplaceLane_F4, false) +WASM_REPLACELANE_OPCODE(F2ReplaceLane, 0x116, M128_D, Simd128_ReplaceLane_D2, false) +WASM_MISC_OPCODE(V8X16Shuffle, 0x117, Limit, false) +WASM_BINARY_OPCODE(I16Add, 0x118, M128X3, Simd128_Add_I16, false) +WASM_BINARY_OPCODE(I8Add, 0x119, M128X3, Simd128_Add_I8, false) +WASM_BINARY_OPCODE(I4Add, 0x11a, M128X3, Simd128_Add_I4, false) +WASM_BINARY_OPCODE(I2Add, 0x11b, M128X3, Simd128_Add_I2, false) +WASM_BINARY_OPCODE(I16Sub, 0x11c, M128X3, Simd128_Sub_I16, false) +WASM_BINARY_OPCODE(I8Sub, 0x11d, M128X3, Simd128_Sub_I8, false) +WASM_BINARY_OPCODE(I4Sub, 0x11e, M128X3, Simd128_Sub_I4, false) +WASM_BINARY_OPCODE(I2Sub, 0x11f, M128X3, Simd128_Sub_I2, false) +WASM_BINARY_OPCODE(I16Mul, 0x120, M128X3, Simd128_Mul_I16, false) +WASM_BINARY_OPCODE(I8Mul, 0x121, M128X3, Simd128_Mul_I8, false) +WASM_BINARY_OPCODE(I4Mul, 0x122, M128X3, Simd128_Mul_I4, false) +WASM_UNARY__OPCODE(I16Neg, 0x123, M128_M128, Simd128_Neg_I16, false) +WASM_UNARY__OPCODE(I8Neg, 0x124, M128_M128, Simd128_Neg_I8, false) +WASM_UNARY__OPCODE(I4Neg, 0x125, M128_M128, Simd128_Neg_I4, false) +WASM_UNARY__OPCODE(I2Neg, 0x126, M128_M128, Simd128_Neg_I2, false) +WASM_BINARY_OPCODE(I16AddSaturateS, 0x127, M128X3, Simd128_AddSaturate_I16, false) +WASM_BINARY_OPCODE(I16AddSaturateU, 0x128, M128X3, Simd128_AddSaturate_U16, false) +WASM_BINARY_OPCODE(I8AddSaturateS, 0x129, M128X3, Simd128_AddSaturate_I8, false) +WASM_BINARY_OPCODE(I8AddSaturateU, 0x12a, M128X3, Simd128_AddSaturate_U8, false) +WASM_BINARY_OPCODE(I16SubSaturateS, 0x12b, M128X3, Simd128_SubSaturate_I16, false) +WASM_BINARY_OPCODE(I16SubSaturateU, 0x12c, M128X3, Simd128_SubSaturate_U16, false) +WASM_BINARY_OPCODE(I8SubSaturateS, 0x12d, M128X3, Simd128_SubSaturate_I8, false) +WASM_BINARY_OPCODE(I8SubSaturateU, 0x12e, M128X3, Simd128_SubSaturate_U8, false) +WASM_BINARY_OPCODE(I16Shl, 0x12f, M128_M128_I, Simd128_ShLtByScalar_I16, false) +WASM_BINARY_OPCODE(I8Shl, 0x130, M128_M128_I, Simd128_ShLtByScalar_I8, false) +WASM_BINARY_OPCODE(I4Shl, 0x131, M128_M128_I, Simd128_ShLtByScalar_I4, false) +WASM_BINARY_OPCODE(I2Shl, 0x132, M128_M128_I, Simd128_ShLtByScalar_I2, false) +WASM_BINARY_OPCODE(I16ShrS, 0x133, M128_M128_I, Simd128_ShRtByScalar_I16, false) +WASM_BINARY_OPCODE(I16ShrU, 0x134, M128_M128_I, Simd128_ShRtByScalar_U16, false) +WASM_BINARY_OPCODE(I8ShrS, 0x135, M128_M128_I, Simd128_ShRtByScalar_I8, false) +WASM_BINARY_OPCODE(I8ShrU, 0x136, M128_M128_I, Simd128_ShRtByScalar_U8, false) +WASM_BINARY_OPCODE(I4ShrS, 0x137, M128_M128_I, Simd128_ShRtByScalar_I4, false) +WASM_BINARY_OPCODE(I4ShrU, 0x138, M128_M128_I, Simd128_ShRtByScalar_U4, false) +WASM_BINARY_OPCODE(I2ShrS, 0x139, M128_M128_I, Simd128_ShRtByScalar_I2, false) +WASM_BINARY_OPCODE(I2ShrU, 0x13a, M128_M128_I, Simd128_ShRtByScalar_U2, false) +WASM_BINARY_OPCODE(M128And, 0x13b, M128X3, Simd128_And_I4, false) +WASM_BINARY_OPCODE(M128Or, 0x13c, M128X3, Simd128_Or_I4, false) +WASM_BINARY_OPCODE(M128Xor, 0x13d, M128X3, Simd128_Xor_I4, false) +WASM_UNARY__OPCODE(M128Not, 0x13e, M128_M128, Simd128_Not_I4, false) +WASM_MISC_OPCODE(M128Bitselect, 0x13f, Limit, false) +WASM_UNARY__OPCODE(I16AnyTrue, 0x140, I_M128, Simd128_AnyTrue_B16, false) +WASM_UNARY__OPCODE(I8AnyTrue, 0x141, I_M128, Simd128_AnyTrue_B8, false) +WASM_UNARY__OPCODE(I4AnyTrue, 0x142, I_M128, Simd128_AnyTrue_B4, false) +WASM_UNARY__OPCODE(I2AnyTrue, 0x143, I_M128, Simd128_AnyTrue_B2, false) +WASM_UNARY__OPCODE(I16AllTrue, 0x144, I_M128, Simd128_AllTrue_B16, false) +WASM_UNARY__OPCODE(I8AllTrue, 0x145, I_M128, Simd128_AllTrue_B8, false) +WASM_UNARY__OPCODE(I4AllTrue, 0x146, I_M128, Simd128_AllTrue_B4, false) +WASM_UNARY__OPCODE(I2AllTrue, 0x147, I_M128, Simd128_AllTrue_B2, false) +WASM_BINARY_OPCODE(I16Eq, 0x148, M128X3, Simd128_Eq_I16, false) +WASM_BINARY_OPCODE(I8Eq, 0x149, M128X3, Simd128_Eq_I8, false) +WASM_BINARY_OPCODE(I4Eq, 0x14a, M128X3, Simd128_Eq_I4, false) +WASM_BINARY_OPCODE(F4Eq, 0x14b, M128X3, Simd128_Eq_F4, false) +WASM_BINARY_OPCODE(F2Eq, 0x14c, M128X3, Simd128_Eq_D2, false) +WASM_BINARY_OPCODE(I16Ne, 0x14d, M128X3, Simd128_Neq_I16, false) +WASM_BINARY_OPCODE(I8Ne, 0x14e, M128X3, Simd128_Neq_I8, false) +WASM_BINARY_OPCODE(I4Ne, 0x14f, M128X3, Simd128_Neq_I4, false) +WASM_BINARY_OPCODE(F4Ne, 0x150, M128X3, Simd128_Neq_F4, false) +WASM_BINARY_OPCODE(F2Ne, 0x151, M128X3, Simd128_Neq_D2, false) +WASM_BINARY_OPCODE(I16LtS, 0x152, M128X3, Simd128_Lt_I16, false) +WASM_BINARY_OPCODE(I16LtU, 0x153, M128X3, Simd128_Lt_U16, false) +WASM_BINARY_OPCODE(I8LtS, 0x154, M128X3, Simd128_Lt_I8, false) +WASM_BINARY_OPCODE(I8LtU, 0x155, M128X3, Simd128_Lt_U8, false) +WASM_BINARY_OPCODE(I4LtS, 0x156, M128X3, Simd128_Lt_I4, false) +WASM_BINARY_OPCODE(I4LtU, 0x157, M128X3, Simd128_Lt_U4, false) +WASM_BINARY_OPCODE(F4Lt, 0x158, M128X3, Simd128_Lt_F4, false) +WASM_BINARY_OPCODE(F2Lt, 0x159, M128X3, Simd128_Lt_D2, false) +WASM_BINARY_OPCODE(I16LeS, 0x15a, M128X3, Simd128_LtEq_I16, false) +WASM_BINARY_OPCODE(I16LeU, 0x15b, M128X3, Simd128_LtEq_U16, false) +WASM_BINARY_OPCODE(I8LeS, 0x15c, M128X3, Simd128_LtEq_I8, false) +WASM_BINARY_OPCODE(I8LeU, 0x15d, M128X3, Simd128_LtEq_U8, false) +WASM_BINARY_OPCODE(I4LeS, 0x15e, M128X3, Simd128_LtEq_I4, false) +WASM_BINARY_OPCODE(I4LeU, 0x15f, M128X3, Simd128_LtEq_U4, false) +WASM_BINARY_OPCODE(F4Le, 0x160, M128X3, Simd128_LtEq_F4, false) +WASM_BINARY_OPCODE(F2Le, 0x161, M128X3, Simd128_LtEq_D2, false) +WASM_BINARY_OPCODE(I16GtS, 0x162, M128X3, Simd128_Gt_I16, false) +WASM_BINARY_OPCODE(I16GtU, 0x163, M128X3, Simd128_Gt_U16, false) +WASM_BINARY_OPCODE(I8GtS, 0x164, M128X3, Simd128_Gt_I8, false) +WASM_BINARY_OPCODE(I8GtU, 0x165, M128X3, Simd128_Gt_U8, false) +WASM_BINARY_OPCODE(I4GtS, 0x166, M128X3, Simd128_Gt_I4, false) +WASM_BINARY_OPCODE(I4GtU, 0x167, M128X3, Simd128_Gt_U4, false) +WASM_BINARY_OPCODE(F4Gt, 0x168, M128X3, Simd128_Gt_F4, false) +WASM_BINARY_OPCODE(F2Gt, 0x169, M128X3, Simd128_Gt_D2, false) +WASM_BINARY_OPCODE(I16GeS, 0x16a, M128X3, Simd128_GtEq_I16, false) +WASM_BINARY_OPCODE(I16GeU, 0x16b, M128X3, Simd128_GtEq_U16, false) +WASM_BINARY_OPCODE(I8GeS, 0x16c, M128X3, Simd128_GtEq_I8, false) +WASM_BINARY_OPCODE(I8GeU, 0x16d, M128X3, Simd128_GtEq_U8, false) +WASM_BINARY_OPCODE(I4GeS, 0x16e, M128X3, Simd128_GtEq_I4, false) +WASM_BINARY_OPCODE(I4GeU, 0x16f, M128X3, Simd128_GtEq_U4, false) +WASM_BINARY_OPCODE(F4Ge, 0x170, M128X3, Simd128_GtEq_F4, false) +WASM_BINARY_OPCODE(F2Ge, 0x171, M128X3, Simd128_GtEq_D2, false) +WASM_UNARY__OPCODE(F4Neg, 0x172, M128_M128, Simd128_Neg_F4, false) +WASM_UNARY__OPCODE(F2Neg, 0x173, M128_M128, Simd128_Neg_D2, false) +WASM_UNARY__OPCODE(F4Abs, 0x174, M128_M128, Simd128_Abs_F4, false) +WASM_UNARY__OPCODE(F2Abs, 0x175, M128_M128, Simd128_Abs_D2, false) +WASM_BINARY_OPCODE(F4Min, 0x176, M128X3, Simd128_Min_F4, false) +WASM_BINARY_OPCODE(F2Min, 0x177, M128X3, Simd128_Min_D2, false) +WASM_BINARY_OPCODE(F4Max, 0x178, M128X3, Simd128_Max_F4, false) +WASM_BINARY_OPCODE(F2Max, 0x179, M128X3, Simd128_Max_D2, false) +WASM_BINARY_OPCODE(F4Add, 0x17a, M128X3, Simd128_Add_F4, false) +WASM_BINARY_OPCODE(F2Add, 0x17b, M128X3, Simd128_Add_D2, false) +WASM_BINARY_OPCODE(F4Sub, 0x17c, M128X3, Simd128_Sub_F4, false) +WASM_BINARY_OPCODE(F2Sub, 0x17d, M128X3, Simd128_Sub_D2, false) +WASM_BINARY_OPCODE(F4Div, 0x17e, M128X3, Simd128_Div_F4, false) +WASM_BINARY_OPCODE(F2Div, 0x17f, M128X3, Simd128_Div_D2, false) +WASM_BINARY_OPCODE(F4Mul, 0x180, M128X3, Simd128_Mul_F4, false) +WASM_BINARY_OPCODE(F2Mul, 0x181, M128X3, Simd128_Mul_D2, false) +WASM_UNARY__OPCODE(F4Sqrt, 0x182, M128_M128, Simd128_Sqrt_F4, false) +WASM_UNARY__OPCODE(F2Sqrt, 0x183, M128_M128, Simd128_Sqrt_D2, false) +WASM_UNARY__OPCODE(F4ConvertS, 0x184, M128_M128, Simd128_FromInt32x4_F4, false) +WASM_UNARY__OPCODE(F4ConvertU, 0x185, M128_M128, Simd128_FromUint32x4_F4, false) +WASM_UNARY__OPCODE(F2ConvertS, 0x186, M128_M128, Simd128_FromInt64x2_D2, false) +WASM_UNARY__OPCODE(F2ConvertU, 0x187, M128_M128, Simd128_FromUint64x2_D2, false) +WASM_UNARY__OPCODE(I4TruncS, 0x188, M128_M128, Simd128_FromFloat32x4_I4, false) +WASM_UNARY__OPCODE(I4TruncU, 0x189, M128_M128, Simd128_FromFloat32x4_U4, false) +WASM_UNARY__OPCODE(I2TruncS, 0x18a, M128_M128, Simd128_FromFloat64x2_I2, false) +WASM_UNARY__OPCODE(I2TruncU, 0x18b, M128_M128, Simd128_FromFloat64x2_U2, false) + +#undef WASM_SIMD_BUILD_OPCODE +#undef WASM_LANE_OPCODE +#undef WASM_EXTRACTLANE_OPCODE +#undef WASM_SIMD_MEMREAD_OPCODE +#undef WASM_SIMD_MEMSTORE_OPCODE +#undef WASM_REPLACELANE_OPCODE diff --git a/lib/WasmReader/WasmBinaryReader.cpp b/lib/WasmReader/WasmBinaryReader.cpp index 90b6e7875f8..71b73e7f400 100644 --- a/lib/WasmReader/WasmBinaryReader.cpp +++ b/lib/WasmReader/WasmBinaryReader.cpp @@ -16,6 +16,7 @@ namespace Wasm namespace WasmTypes { + bool IsLocalType(WasmTypes::WasmType type) { // Check if type in range ]Void,Limit[ @@ -31,6 +32,9 @@ uint32 GetTypeByteSize(WasmType type) case I64: return sizeof(int64); case F32: return sizeof(float); case F64: return sizeof(double); + case M128: + CompileAssert(sizeof(Simd::simdvec) == 16); + return sizeof(Simd::simdvec); case Ptr: return sizeof(void*); default: Js::Throw::InternalError(); @@ -45,6 +49,7 @@ const char16 * GetTypeName(WasmType type) case WasmTypes::WasmType::I64: return _u("i64"); case WasmTypes::WasmType::F32: return _u("f32"); case WasmTypes::WasmType::F64: return _u("f64"); + case WasmTypes::WasmType::M128: return _u("m128"); case WasmTypes::WasmType::Any: return _u("any"); default: Assert(UNREACHED); break; } @@ -61,6 +66,7 @@ WasmTypes::WasmType LanguageTypes::ToWasmType(int8 binType) case LanguageTypes::i64: return WasmTypes::I64; case LanguageTypes::f32: return WasmTypes::F32; case LanguageTypes::f64: return WasmTypes::F64; + case LanguageTypes::m128: return WasmTypes::M128; default: throw WasmCompilationException(_u("Invalid binary type %d"), binType); } @@ -396,6 +402,19 @@ WasmOp WasmBinaryReader::ReadOpCode() WasmOp op = m_currentNode.op = (WasmOp)*m_pc++; ++m_funcState.count; + if (op == wbSimdStart) + { + if (!CONFIG_FLAG(WasmSimd)) + { + ThrowDecodingError(_u("WebAssembly SIMD support is not enabled")); + } + + uint32 len; + uint32 extOpCode = LEB128(len) + wbM128Const; + Assert((WasmOp)(extOpCode) == extOpCode); + op = (WasmOp)extOpCode; + m_funcState.count += len; + } return op; } @@ -450,6 +469,9 @@ WasmOp WasmBinaryReader::ReadExpr() case wbF64Const: ConstNode(); break; + case wbM128Const: + ConstNode(); + break; case wbSetLocal: case wbGetLocal: case wbTeeLocal: @@ -477,10 +499,17 @@ WasmOp WasmBinaryReader::ReadExpr() } break; } + case wbV8X16Shuffle: + ShuffleNode(); + break; +#define WASM_LANE_OPCODE(opname, opcode, sig, nyi) \ + case wb##opname: \ + LaneNode(); \ + break; #define WASM_MEM_OPCODE(opname, opcode, sig, nyi) \ case wb##opname: \ MemNode(); \ - break; + break; #include "WasmBinaryOpCodes.h" default: break; @@ -592,6 +621,22 @@ void WasmBinaryReader::BrTableNode() m_funcState.count += len; } +void WasmBinaryReader::ShuffleNode() +{ + CheckBytesLeft(Simd::MAX_LANES); + for (uint32 i = 0; i < Simd::MAX_LANES; i++) + { + m_currentNode.shuffle.indices[i] = ReadConst(); + } + m_funcState.count += Simd::MAX_LANES; +} + +void WasmBinaryReader::LaneNode() +{ + m_currentNode.lane.index = ReadConst(); + m_funcState.count++; +} + void WasmBinaryReader::MemNode() { uint32 len = 0; @@ -640,6 +685,13 @@ void WasmBinaryReader::ConstNode() CompileAssert(sizeof(int64) == sizeof(double)); m_funcState.count += sizeof(double); break; + case WasmTypes::M128: + for (uint i = 0; i < Simd::VEC_WIDTH; i++) + { + m_currentNode.cnst.v128[i] = ReadConst(); + } + m_funcState.count += Simd::VEC_WIDTH; + break; } } diff --git a/lib/WasmReader/WasmBinaryReader.h b/lib/WasmReader/WasmBinaryReader.h index 835fb6d942e..a2ceeecbb45 100644 --- a/lib/WasmReader/WasmBinaryReader.h +++ b/lib/WasmReader/WasmBinaryReader.h @@ -14,6 +14,7 @@ namespace Wasm const int8 i64 = 0x80 - 0x2; const int8 f32 = 0x80 - 0x3; const int8 f64 = 0x80 - 0x4; + const int8 m128 = 0x80 - 0x5; const int8 anyfunc = 0x80 - 0x10; const int8 func = 0x80 - 0x20; const int8 emptyBlock = 0x80 - 0x40; @@ -54,6 +55,8 @@ namespace Wasm bool ProcessCurrentSection(); virtual void SeekToFunctionBody(class WasmFunctionInfo* funcInfo) override; virtual bool IsCurrentFunctionCompleted() const override; + + WasmOp ReadOpCode(); //@TODO might need to be moved into WasmReaderBase virtual WasmOp ReadExpr() override; virtual void FunctionEnd() override; virtual const uint32 EstimateCurrentFunctionBytecodeSize() const override; @@ -70,7 +73,6 @@ namespace Wasm uint32 count; // current entry uint32 size; // binary size of the function }; - WasmOp ReadOpCode(); void BlockNode(); void CallNode(); @@ -78,6 +80,8 @@ namespace Wasm void BrNode(); void BrTableNode(); void MemNode(); + void LaneNode(); + void ShuffleNode(); void VarNode(); // Module readers diff --git a/lib/WasmReader/WasmByteCodeGenerator.cpp b/lib/WasmReader/WasmByteCodeGenerator.cpp index 3219451ac96..0c8cf55bc4d 100644 --- a/lib/WasmReader/WasmByteCodeGenerator.cpp +++ b/lib/WasmReader/WasmByteCodeGenerator.cpp @@ -193,6 +193,7 @@ Js::AsmJsRetType WasmToAsmJs::GetAsmJsReturnType(WasmTypes::WasmType wasmType) case WasmTypes::F32: return Js::AsmJsRetType::Float; case WasmTypes::F64: return Js::AsmJsRetType::Double; case WasmTypes::Void: return Js::AsmJsRetType::Void; + case WasmTypes::M128: return Js::AsmJsRetType::Float32x4; default: throw WasmCompilationException(_u("Unknown return type %u"), wasmType); } @@ -208,6 +209,7 @@ Js::AsmJsVarType WasmToAsmJs::GetAsmJsVarType(WasmTypes::WasmType wasmType) case WasmTypes::I64: return Js::AsmJsVarType::Int64; case WasmTypes::F32: return Js::AsmJsVarType::Float; case WasmTypes::F64: return Js::AsmJsVarType::Double; + case WasmTypes::M128: return Js::AsmJsVarType::Float32x4; default: throw WasmCompilationException(_u("Unknown var type %u"), wasmType); } @@ -467,7 +469,7 @@ WasmBytecodeGenerator::WasmBytecodeGenerator(Js::ScriptContext* scriptContext, W m_scriptContext(scriptContext), m_alloc(_u("WasmBytecodeGen"), scriptContext->GetThreadContext()->GetPageAllocator(), Js::Throw::OutOfMemory), m_evalStack(&m_alloc), - mTypedRegisterAllocator(&m_alloc, AllocateRegisterSpace, 1 << WAsmJs::SIMD), + mTypedRegisterAllocator(&m_alloc, AllocateRegisterSpace, CONFIG_FLAG(WasmSimd) ? 0 : 1 << WAsmJs::SIMD), m_blockInfos(&m_alloc), currentProfileId(0), isUnreachable(false) @@ -598,6 +600,11 @@ void WasmBytecodeGenerator::EnregisterLocals() case WasmTypes::I64: m_writer->AsmLong1Const1(Js::OpCodeAsmJs::Ld_LongConst, m_locals[i].location, 0); break; + case WasmTypes::M128: + { + m_writer->WasmSimdConst(Js::OpCodeAsmJs::Simd128_LdC, m_locals[i].location, 0, 0, 0, 0); + break; + } default: Assume(UNREACHED); } @@ -605,6 +612,43 @@ void WasmBytecodeGenerator::EnregisterLocals() } } +template +EmitInfo WasmBytecodeGenerator::EmitSimdBuildExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature) +{ + const WasmTypes::WasmType resultType = signature[0]; + const WasmTypes::WasmType type = signature[1]; + + Js::RegSlot resultReg = GetRegisterSpace(resultType)->AcquireTmpRegister(); + + EmitInfo args[lanes]; + for (uint i = 0; i < lanes; i++) + { + args[i] = PopEvalStack(type); + } + + switch (lanes) + { + case 4: + m_writer->AsmReg5(op, resultReg, args[3].location, args[2].location, args[1].location, args[0].location); + break; + case 8: + m_writer->AsmReg9(op, resultReg, args[7].location, args[6].location, args[5].location, args[4].location, args[3].location, args[2].location, args[1].location, args[0].location); + break; + case 16: + m_writer->AsmReg17(op, resultReg, args[15].location, args[14].location, args[13].location, args[12].location, args[11].location, args[10].location, args[9].location, args[8].location, args[7].location, args[6].location, args[5].location, args[4].location, args[3].location, args[2].location, args[1].location, args[0].location); + break; + default: + Assert(UNREACHED); + } + + for (uint i = 0; i < lanes; i++) + { + ReleaseLocation(&args[i]); + } + + return EmitInfo(resultReg, resultType); +} + void WasmBytecodeGenerator::EmitExpr(WasmOp op) { DebugPrintOp(op); @@ -652,6 +696,9 @@ void WasmBytecodeGenerator::EmitExpr(WasmOp op) case wbI64Const: info = EmitConst(WasmTypes::I64, GetReader()->m_currentNode.cnst); break; + case wbM128Const: + info = EmitConst(WasmTypes::M128, GetReader()->m_currentNode.cnst); + break; case wbBlock: info = EmitBlock(); break; @@ -708,6 +755,20 @@ void WasmBytecodeGenerator::EmitExpr(WasmOp op) SetUnreachableState(true); info.type = WasmTypes::Any; break; + case wbM128Bitselect: + info = EmitM128BitSelect(); + break; + case wbV8X16Shuffle: + info = EmitV8X16Shuffle(); + break; +#define WASM_EXTRACTLANE_OPCODE(opname, opcode, sig, asmjsop, nyi) \ + case wb##opname: \ + info = EmitExtractLaneExpr(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig); \ + break; +#define WASM_REPLACELANE_OPCODE(opname, opcode, sig, asmjsop, nyi) \ + case wb##opname: \ + info = EmitReplaceLaneExpr(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig); \ + break; #define WASM_MEMREAD_OPCODE(opname, opcode, sig, nyi, viewtype) \ case wb##opname: \ Assert(WasmOpCodeSignatures::n##sig > 0);\ @@ -718,6 +779,16 @@ void WasmBytecodeGenerator::EmitExpr(WasmOp op) Assert(WasmOpCodeSignatures::n##sig > 0);\ info = EmitMemAccess(wb##opname, WasmOpCodeSignatures::sig, viewtype, true); \ break; +#define WASM_SIMD_MEMREAD_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) \ + case wb##opname: \ + Assert(WasmOpCodeSignatures::n##sig > 0);\ + info = EmitSimdMemAccess(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig, viewtype, dataWidth, false); \ + break; +#define WASM_SIMD_MEMSTORE_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, nyi) \ + case wb##opname: \ + Assert(WasmOpCodeSignatures::n##sig > 0);\ + info = EmitSimdMemAccess(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig, viewtype, dataWidth, true); \ + break; #define WASM_BINARY_OPCODE(opname, opcode, sig, asmjsop, nyi) \ case wb##opname: \ Assert(WasmOpCodeSignatures::n##sig == 3);\ @@ -728,6 +799,11 @@ void WasmBytecodeGenerator::EmitExpr(WasmOp op) Assert(WasmOpCodeSignatures::n##sig == 2);\ info = EmitUnaryExpr(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig); \ break; +#define WASM_SIMD_BUILD_OPCODE(opname, opcode, sig, asmjop, lanes, nyi) \ + case wb##opname: \ + Assert(WasmOpCodeSignatures::n##sig == 2);\ + info = EmitSimdBuildExpr(Js::OpCodeAsmJs::##asmjop, WasmOpCodeSignatures::sig); \ + break; #define WASM_EMPTY__OPCODE(opname, opcode, asmjsop, nyi) \ case wb##opname: \ m_writer->EmptyAsm(Js::OpCodeAsmJs::##asmjsop);\ @@ -873,6 +949,11 @@ void WasmBytecodeGenerator::EmitLoadConst(EmitInfo dst, WasmConstLitNode cnst) case WasmTypes::I64: m_writer->AsmLong1Const1(Js::OpCodeAsmJs::Ld_LongConst, dst.location, cnst.i64); break; + case WasmTypes::M128: + { + m_writer->WasmSimdConst(Js::OpCodeAsmJs::Simd128_LdC, dst.location, cnst.v128[0], cnst.v128[1], cnst.v128[2], cnst.v128[3]); + break; + } default: throw WasmCompilationException(_u("Unknown type %u"), dst.type); } @@ -1080,6 +1161,9 @@ EmitInfo WasmBytecodeGenerator::EmitCall() case WasmTypes::I64: argOp = isImportCall ? Js::OpCodeAsmJs::ArgOut_Long : Js::OpCodeAsmJs::I_ArgOut_Long; break; + case WasmTypes::M128: + argOp = isImportCall ? Js::OpCodeAsmJs::Simd128_ArgOut_F4 : Js::OpCodeAsmJs::Simd128_I_ArgOut_F4; + break; case WasmTypes::Any: // In unreachable mode allow any type as argument since we won't actually emit the call Assert(IsUnreachable()); @@ -1320,8 +1404,8 @@ EmitInfo WasmBytecodeGenerator::EmitBinExpr(Js::OpCodeAsmJs op, const WasmTypes: WasmTypes::WasmType lhsType = signature[1]; WasmTypes::WasmType rhsType = signature[2]; - EmitInfo rhs = PopEvalStack(lhsType); - EmitInfo lhs = PopEvalStack(rhsType); + EmitInfo rhs = PopEvalStack(rhsType); + EmitInfo lhs = PopEvalStack(lhsType); ReleaseLocation(&rhs); ReleaseLocation(&lhs); @@ -1354,6 +1438,165 @@ EmitInfo WasmBytecodeGenerator::EmitUnaryExpr(Js::OpCodeAsmJs op, const WasmType return EmitInfo(resultReg, resultType); } +void WasmBytecodeGenerator::CheckLaneIndex(Js::OpCodeAsmJs op, const uint index) +{ + uint numLanes; + switch (op) + { + case Js::OpCodeAsmJs::Simd128_ExtractLane_I2: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_I2: + case Js::OpCodeAsmJs::Simd128_ExtractLane_D2: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_D2: + numLanes = 2; + break; + case Js::OpCodeAsmJs::Simd128_ExtractLane_I4: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_I4: + case Js::OpCodeAsmJs::Simd128_ExtractLane_F4: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_F4: + numLanes = 4; + break; + case Js::OpCodeAsmJs::Simd128_ExtractLane_I8: + case Js::OpCodeAsmJs::Simd128_ExtractLane_U8: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_I8: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_U8: + numLanes = 8; + break; + case Js::OpCodeAsmJs::Simd128_ExtractLane_I16: + case Js::OpCodeAsmJs::Simd128_ExtractLane_U16: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_I16: + case Js::OpCodeAsmJs::Simd128_ReplaceLane_U16: + numLanes = 16; + break; + default: + Assert(UNREACHED); + numLanes = 0; + } + + if (index >= numLanes) + { + throw WasmCompilationException(_u("index is out of range")); + } +} + +EmitInfo WasmBytecodeGenerator::EmitLaneIndex(Js::OpCodeAsmJs op) +{ + const uint index = GetReader()->m_currentNode.lane.index; + CheckLaneIndex(op, index); + WasmConstLitNode dummy; + dummy.i32 = index; + return EmitConst(WasmTypes::I32, dummy); +} + +EmitInfo WasmBytecodeGenerator::EmitReplaceLaneExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature) { + + const WasmTypes::WasmType resultType = signature[0]; + const WasmTypes::WasmType valueType = signature[1]; + EmitInfo valueArg = PopEvalStack(valueType, _u("lane argument type mismatch")); + + EmitInfo simdArg = PopEvalStack(WasmTypes::M128, _u("simd argument type mismatch")); + Assert(resultType == WasmTypes::M128); + + EmitInfo indexInfo = EmitLaneIndex(op); + Js::RegSlot resultReg = GetRegisterSpace(resultType)->AcquireTmpRegister(); + EmitInfo result(resultReg, resultType); + + m_writer->AsmReg4(op, resultReg, simdArg.location, indexInfo.location, valueArg.location); + ReleaseLocation(&indexInfo); + return result; +} + +EmitInfo WasmBytecodeGenerator::EmitM128BitSelect() +{ + EmitInfo mask = PopEvalStack(WasmTypes::M128); + EmitInfo arg2Info = PopEvalStack(WasmTypes::M128); + EmitInfo arg1Info = PopEvalStack(WasmTypes::M128); + Js::RegSlot resultReg = GetRegisterSpace(WasmTypes::M128)->AcquireTmpRegister(); + EmitInfo resultInfo(resultReg, WasmTypes::M128); + m_writer->AsmReg4(Js::OpCodeAsmJs::Simd128_BitSelect_I4, resultReg, arg1Info.location, arg2Info.location, mask.location); + return resultInfo; +} + +EmitInfo WasmBytecodeGenerator::EmitV8X16Shuffle() +{ + EmitInfo arg2Info = PopEvalStack(WasmTypes::M128); + EmitInfo arg1Info = PopEvalStack(WasmTypes::M128); + + Js::RegSlot resultReg = GetRegisterSpace(WasmTypes::M128)->AcquireTmpRegister(); + EmitInfo resultInfo(resultReg, WasmTypes::M128); + + uint8* indices = GetReader()->m_currentNode.shuffle.indices; + for (uint i = 0; i < Simd::MAX_LANES; i++) + { + if (indices[i] >= Simd::MAX_LANES * 2) + { + throw WasmCompilationException(_u("%u-th shuffle lane index is larger than %u"), i, (Simd::MAX_LANES * 2 -1)); + } + } + + m_writer->AsmShuffle(Js::OpCodeAsmJs::Simd128_Shuffle_V8X16, resultReg, arg1Info.location, arg2Info.location, indices); + return resultInfo; +} + +EmitInfo WasmBytecodeGenerator::EmitExtractLaneExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature) +{ + WasmTypes::WasmType resultType = signature[0]; + WasmTypes::WasmType simdArgType = signature[1]; + + EmitInfo simdArgInfo = PopEvalStack(simdArgType, _u("Argument should be of type M128")); + + Js::RegSlot resultReg = GetRegisterSpace(resultType)->AcquireTmpRegister(); + EmitInfo resultInfo(resultReg, resultType); + + //put index into a register to reuse the existing infra in Interpreter and Compiler + EmitInfo indexInfo = EmitLaneIndex(op); + + m_writer->AsmReg3(op, resultReg, simdArgInfo.location, indexInfo.location); + ReleaseLocation(&indexInfo); + ReleaseLocation(&simdArgInfo); + return resultInfo; +} + +EmitInfo WasmBytecodeGenerator::EmitSimdMemAccess(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, bool isStore) +{ + + WasmTypes::WasmType type = signature[0]; + SetUsesMemory(0); + + const uint32 mask = Js::ArrayBufferView::ViewMask[viewType]; + const uint alignment = GetReader()->m_currentNode.mem.alignment; + const uint offset = GetReader()->m_currentNode.mem.offset; + + if ((mask << 1) & (1 << alignment)) + { + throw WasmCompilationException(_u("alignment must not be larger than natural")); + } + + EmitInfo rhsInfo; + if (isStore) + { + rhsInfo = PopEvalStack(type, _u("Invalid type for store op")); + } + EmitInfo exprInfo = PopEvalStack(WasmTypes::I32, _u("Index expression must be of type i32")); + + if (isStore) + { + m_writer->AsmSimdTypedArr(op, rhsInfo.location, exprInfo.location, dataWidth, viewType, offset); + + ReleaseLocation(&rhsInfo); + ReleaseLocation(&exprInfo); + + return EmitInfo(); + } + + Js::RegSlot resultReg = GetRegisterSpace(type)->AcquireTmpRegister(); + m_writer->AsmSimdTypedArr(op, resultReg, exprInfo.location, dataWidth, viewType, offset); + + EmitInfo yieldInfo = EmitInfo(resultReg, type); + ReleaseLocation(&exprInfo); + + return yieldInfo; +} + EmitInfo WasmBytecodeGenerator::EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, bool isStore) { WasmTypes::WasmType type = signature[0]; @@ -1385,7 +1628,7 @@ EmitInfo WasmBytecodeGenerator::EmitMemAccess(WasmOp wasmOp, const WasmTypes::Wa } ReleaseLocation(&exprInfo); - Js::RegSlot resultReg = GetRegisterSpace(type)->AcquireTmpRegister(); + Js::RegSlot resultReg = GetRegisterSpace(type)->AcquireTmpRegister(); m_writer->WasmMemAccess(Js::OpCodeAsmJs::LdArrWasm, resultReg, exprInfo.location, offset, viewType); EmitInfo yieldInfo; @@ -1512,6 +1755,8 @@ Js::OpCodeAsmJs WasmBytecodeGenerator::GetLoadOp(WasmTypes::WasmType wasmType) return Js::OpCodeAsmJs::Ld_Int; case WasmTypes::I64: return Js::OpCodeAsmJs::Ld_Long; + case WasmTypes::M128: + return Js::OpCodeAsmJs::Simd128_Ld_F4; case WasmTypes::Any: // In unreachable mode load the any type like an int since we won't actually emit the load Assert(IsUnreachable()); @@ -1541,6 +1786,9 @@ Js::OpCodeAsmJs WasmBytecodeGenerator::GetReturnOp(WasmTypes::WasmType type) case WasmTypes::I64: retOp = Js::OpCodeAsmJs::Return_Long; break; + case WasmTypes::M128: + retOp = Js::OpCodeAsmJs::Simd128_Return_F4; + break; case WasmTypes::Any: // In unreachable mode load the any type like an int since we won't actually emit the load Assert(IsUnreachable()); @@ -1653,9 +1901,11 @@ WasmRegisterSpace* WasmBytecodeGenerator::GetRegisterSpace(WasmTypes::WasmType t #if TARGET_64 case WasmTypes::Ptr: #endif - case WasmTypes::I64: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::INT64); - case WasmTypes::F32: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::FLOAT32); - case WasmTypes::F64: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::FLOAT64); + case WasmTypes::I64: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::INT64); + case WasmTypes::F32: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::FLOAT32); + case WasmTypes::F64: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::FLOAT64); + case WasmTypes::M128: return mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::SIMD); + default: return nullptr; } @@ -1711,7 +1961,7 @@ void WasmBytecodeGenerator::ExitEvalStackScope() Assert(!m_evalStack.Empty()); EmitInfo info = m_evalStack.Pop(); // It is possible to have unconsumed Any type left on the stack, simply remove them - while (info.type == WasmTypes::Any) + while (info.type == WasmTypes::Any) { Assert(!m_evalStack.Empty()); info = m_evalStack.Pop(); diff --git a/lib/WasmReader/WasmByteCodeGenerator.h b/lib/WasmReader/WasmByteCodeGenerator.h index f0ebfd4e493..02bc29cc383 100644 --- a/lib/WasmReader/WasmByteCodeGenerator.h +++ b/lib/WasmReader/WasmByteCodeGenerator.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------------------------------- -// Copyright (C) Microsoft. All rights reserved. +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. //------------------------------------------------------------------------------------------------------- #pragma once @@ -179,6 +179,8 @@ namespace Wasm private: void GenerateFunction(); + template + EmitInfo EmitSimdBuildExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature); void EmitExpr(WasmOp op); EmitInfo EmitBlock(); void EmitBlockCommon(BlockInfo* blockInfo, bool* endOnElse = nullptr); @@ -210,14 +212,21 @@ namespace Wasm EmitInfo EmitBrIf(); EmitInfo EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, bool isStore); + EmitInfo EmitSimdMemAccess(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, bool isStore); EmitInfo EmitBinExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature); EmitInfo EmitUnaryExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature); + EmitInfo EmitM128BitSelect(); + EmitInfo EmitV8X16Shuffle(); + EmitInfo EmitExtractLaneExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature); + EmitInfo EmitReplaceLaneExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature); + void CheckLaneIndex(Js::OpCodeAsmJs op, const uint index); + EmitInfo EmitLaneIndex(Js::OpCodeAsmJs op); EmitInfo EmitConst(WasmTypes::WasmType type, WasmConstLitNode cnst); void EmitLoadConst(EmitInfo dst, WasmConstLitNode cnst); WasmConstLitNode GetZeroCnst(); - void EnsureStackAvailable(); + void EnregisterLocals(); void ReleaseLocation(EmitInfo* info); diff --git a/lib/WasmReader/WasmParseTree.h b/lib/WasmReader/WasmParseTree.h index 85cb0319bc5..dfff3fb3235 100644 --- a/lib/WasmReader/WasmParseTree.h +++ b/lib/WasmReader/WasmParseTree.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------------------------------- -// Copyright (C) Microsoft. All rights reserved. +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. //------------------------------------------------------------------------------------------------------- @@ -7,6 +7,13 @@ namespace Wasm { + const uint16 EXTENDED_OFFSET = 256; + namespace Simd { + const size_t VEC_WIDTH = 4; + typedef uint32 simdvec [VEC_WIDTH]; //TODO: maybe we should pull in SIMDValue? + const size_t MAX_LANES = 16; + } + namespace WasmTypes { enum WasmType @@ -17,10 +24,14 @@ namespace Wasm I64 = 2, F32 = 3, F64 = 4, + M128 = 5, Limit, Ptr, Any }; + + extern const char16* const strIds[Limit]; + bool IsLocalType(WasmTypes::WasmType type); uint32 GetTypeByteSize(WasmType type); const char16* GetTypeName(WasmType type); @@ -65,7 +76,7 @@ namespace Wasm #include "WasmBinaryOpCodes.h" }; - enum WasmOp : byte + enum WasmOp : uint16 { #define WASM_OPCODE(opname, opcode, sig, nyi) wb##opname = opcode, #include "WasmBinaryOpCodes.h" @@ -79,9 +90,20 @@ namespace Wasm double f64; int32 i32; int64 i64; + Simd::simdvec v128; }; }; + struct WasmShuffleNode + { + uint8 indices[Simd::MAX_LANES]; + }; + + struct WasmLaneNode + { + uint index; + }; + struct WasmVarNode { uint32 num; @@ -128,6 +150,8 @@ namespace Wasm WasmConstLitNode cnst; WasmMemOpNode mem; WasmVarNode var; + WasmLaneNode lane; + WasmShuffleNode shuffle; }; }; diff --git a/lib/WasmReader/WasmSignature.cpp b/lib/WasmReader/WasmSignature.cpp index 48ae3d81588..1011297832f 100644 --- a/lib/WasmReader/WasmSignature.cpp +++ b/lib/WasmReader/WasmSignature.cpp @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------------------------------- -// Copyright (C) Microsoft. All rights reserved. +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. //------------------------------------------------------------------------------------------------------- @@ -115,6 +115,10 @@ Js::ArgSlot WasmSignature::GetParamSize(Js::ArgSlot index) const CompileAssert(sizeof(double) == sizeof(int64)); return sizeof(int64); break; + case WasmTypes::M128: + CompileAssert(sizeof(Simd::simdvec) == 16); + return sizeof(Simd::simdvec); + break; default: throw WasmCompilationException(_u("Invalid param type")); } @@ -134,21 +138,24 @@ void WasmSignature::FinalizeSignature() } } - CompileAssert(Local::Limit - 1 <= 4); CompileAssert(Local::Void == 0); +#if 0 + CompileAssert(Local::Limit - 1 <= 4); + // 3 bits for result type, 2 for each arg // we don't need to reserve a sentinel bit because there is no result type with value of 7 - uint32 sigSize = ((uint32)paramCount) * 2 + 3; + uint32 sigSize = ((uint32)paramCount) * 3 + 3; if (sigSize <= sizeof(m_shortSig) << 3) { m_shortSig = (m_shortSig << 3) | m_resultType; for (Js::ArgSlot i = 0; i < paramCount; ++i) { // we can use 2 bits per arg by dropping void - m_shortSig = (m_shortSig << 2) | (m_params[i] - 1); + m_shortSig = (m_shortSig << 3) | (m_params[i] - 1); } } +#endif } Js::ArgSlot WasmSignature::GetParamsSize() const diff --git a/test/rlexedirs.xml b/test/rlexedirs.xml index 64f5d8619f4..31a0ad46d18 100644 --- a/test/rlexedirs.xml +++ b/test/rlexedirs.xml @@ -326,4 +326,10 @@ Scanner + + + wasm.simd + require_wasm,exclude_serialized,exclude_arm,exclude_arm64,require_backend,exclude_xplat + + diff --git a/test/wasm.simd/b16x8.wasm b/test/wasm.simd/b16x8.wasm new file mode 100755 index 00000000000..782889e6990 Binary files /dev/null and b/test/wasm.simd/b16x8.wasm differ diff --git a/test/wasm.simd/b16x8.wast b/test/wasm.simd/b16x8.wast new file mode 100755 index 00000000000..efaeec3e43d --- /dev/null +++ b/test/wasm.simd/b16x8.wast @@ -0,0 +1,183 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_b16x8_0") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_b16x8_1") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_b16x8_2") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_b16x8_3") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 3) + ) + ) + (func + (export "func_b16x8_4") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 4) + ) + ) + (func + (export "func_b16x8_5") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 5) + ) + ) + (func + (export "func_b16x8_6") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 6) + ) + ) + (func + (export "func_b16x8_7") + (result i32) + (local b16x8) + (set_local + 0 + (b16x8.build + (i32.const 1) + (i32.const 0) + (i32.const 2147483647) + (i32.const -2147483648) + (i32.const 5) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (b16x8.extractLane + (get_local 0) + (i32.const 7) + ) + ) +) diff --git a/test/wasm.simd/b32x4.wasm b/test/wasm.simd/b32x4.wasm new file mode 100755 index 00000000000..16f0acdb79d Binary files /dev/null and b/test/wasm.simd/b32x4.wasm differ diff --git a/test/wasm.simd/b32x4.wast b/test/wasm.simd/b32x4.wast new file mode 100755 index 00000000000..1e829bbf2db --- /dev/null +++ b/test/wasm.simd/b32x4.wast @@ -0,0 +1,79 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_b32x4_0") + (result i32) + (local b32x4) + (set_local + 0 + (b32x4.build + (i32.const 0) + (i32.const 1) + (i32.const -2147483648) + (i32.const 2147483647) + ) + ) + (b32x4.extractLane + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_b32x4_1") + (result i32) + (local b32x4) + (set_local + 0 + (b32x4.build + (i32.const 0) + (i32.const 1) + (i32.const -2147483648) + (i32.const 2147483647) + ) + ) + (b32x4.extractLane + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_b32x4_2") + (result i32) + (local b32x4) + (set_local + 0 + (b32x4.build + (i32.const 0) + (i32.const 1) + (i32.const -2147483648) + (i32.const 2147483647) + ) + ) + (b32x4.extractLane + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_b32x4_3") + (result i32) + (local b32x4) + (set_local + 0 + (b32x4.build + (i32.const 0) + (i32.const 1) + (i32.const -2147483648) + (i32.const 2147483647) + ) + ) + (b32x4.extractLane + (get_local 0) + (i32.const 3) + ) + ) +) diff --git a/test/wasm.simd/b8x16.wasm b/test/wasm.simd/b8x16.wasm new file mode 100755 index 00000000000..3a93652352a Binary files /dev/null and b/test/wasm.simd/b8x16.wasm differ diff --git a/test/wasm.simd/b8x16.wast b/test/wasm.simd/b8x16.wast new file mode 100755 index 00000000000..fbf5c2d673a --- /dev/null +++ b/test/wasm.simd/b8x16.wast @@ -0,0 +1,487 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_b8x16_0") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_b8x16_1") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_b8x16_2") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_b8x16_3") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 3) + ) + ) + (func + (export "func_b8x16_4") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 4) + ) + ) + (func + (export "func_b8x16_5") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 5) + ) + ) + (func + (export "func_b8x16_6") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 6) + ) + ) + (func + (export "func_b8x16_7") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 7) + ) + ) + (func + (export "func_b8x16_8") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 8) + ) + ) + (func + (export "func_b8x16_9") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 9) + ) + ) + (func + (export "func_b8x16_10") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 10) + ) + ) + (func + (export "func_b8x16_11") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 11) + ) + ) + (func + (export "func_b8x16_12") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 12) + ) + ) + (func + (export "func_b8x16_13") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 13) + ) + ) + (func + (export "func_b8x16_14") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 14) + ) + ) + (func + (export "func_b8x16_15") + (result i32) + (local b8x16) + (set_local + 0 + (b8x16.build + (i32.const 1) + (i32.const 0) + (i32.const 3) + (i32.const -4) + (i32.const 5) + (i32.const 6) + (i32.const -2147483648) + (i32.const 8) + (i32.const 9) + (i32.const 65535) + (i32.const 11) + (i32.const 12) + (i32.const 65534) + (i32.const 14) + (i32.const 2147483647) + (i32.const 16) + ) + ) + (b8x16.extractLane + (get_local 0) + (i32.const 15) + ) + ) +) diff --git a/test/wasm.simd/binaryArithmeticTests.js b/test/wasm.simd/binaryArithmeticTests.js new file mode 100755 index 00000000000..07d93e89e7c --- /dev/null +++ b/test/wasm.simd/binaryArithmeticTests.js @@ -0,0 +1,374 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- +let passed = true; + +function check(expected, funName, args) { + checkInternal(expected, (a, b) => a == b, funName, args); +} + +function checkNaN(funName, args) { + checkInternal(Number.NaN, (a, b) => Number.isNaN(a), funName, args); +} + +function checkInternal(expected, eq, funName, args) { + let fun = eval(funName); + var result; + try { + result = fun(...args); + } catch (e) { + print(e); + result = e.message; + } + + if (!eq(result, expected)) { + passed = false; + print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`); + } +} + +let ffi = {}; +var ii; +var mod; +var exports; + +mod = new WebAssembly.Module(readbuffer('binaryArithmeticTests.wasm')); +exports = new WebAssembly.Instance(mod, ffi).exports; + +tests = [ + //func_i32x4_add_3 + { + func: "func_i32x4_add_3", + args: [128, 128, 128, 128, 2, 2, 2, 2], + expected: 130 + }, + { + func: "func_i32x4_add_3", + args: [-1, -1, -1, -1, 2, 2, 2, 2], + expected: 1 + }, + //func_i32x4_sub_3 + { + func: "func_i32x4_sub_3", + args: [128, 128, 128, 128, 28, 28, 28, 28], + expected: 100 + }, + { + func: "func_i32x4_sub_3", + args: [-2147483648, -2147483648, -2147483648, -2147483648, 1, 1, 1, 1], + expected: 2147483647 + }, + //func_i32x4_mul_3 + { + func: "func_i32x4_mul_3", + args: [55, 55, 55, 55, 2, 2, 2, 2], + expected: 110 + }, + { + func: "func_i32x4_mul_3", + args: [2147483647, 2147483647, 2147483647, 2147483647, 2, 2, 2, 2], + expected: -2 + }, + //func_i32x4_shl_3 + { + func: "func_i32x4_shl_3", + args: [1, 1, 1, 1, 31, 31, 31, 31], + expected: -2147483648 + }, + { + func: "func_i32x4_shl_3", + args: [1, 1, 1, 1, 32, 32, 32, 32], + expected: 1 + }, + //func_i32x4_shr_3_u + { + func: "func_i32x4_shr_3_u", + args: [-2147483648, -2147483648, -2147483648, -2147483648, 31, 31, 31, 31], + expected: 1 + }, + { + func: "func_i32x4_shr_3_u", + args: [-2147483648, -2147483648, -2147483648, -2147483648, 32, 32, 32, 32], + expected: -2147483648 + }, + //func_i32x4_shr_3_s + { + func: "func_i32x4_shr_3_s", + args: [-2147483648, -2147483648, -2147483648, -2147483648, 1, 1, 1, 1], + expected: -1073741824 + }, + { + func: "func_i32x4_shr_3_s", + args: [-2147483648, -2147483648, -2147483648, -2147483648, 31, 31, 31, 31], + expected: -1 + }, + //func_i16x8_add_3_u + { + func: "func_i16x8_add_3_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 130 + }, + { + func: "func_i16x8_add_3_u", + args: [65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 1 + }, + // (export "func_i16x8_addsaturate_3_s_u") + { + func: "func_i16x8_addsaturate_3_s_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 130 + }, + { + func: "func_i16x8_addsaturate_3_s_u", + args: [32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 32767 + }, + //func_i16x8_sub_3_u + { + func: "func_i16x8_sub_3_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 28, 28, 28, 28, 28, 28, 28, 28], + expected: 100 + }, + { + func: "func_i16x8_sub_3_u", + args: [-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, 1, 1, 1, 1, 1, 1, 1, 1], + expected: 32767 + }, + //func_i16x8_subsaturate_3_s_u + { + func: "func_i16x8_subsaturate_3_s_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 28, 28, 28, 28, 28, 28, 28, 28], + expected: 100 + }, + { + func: "func_i16x8_subsaturate_3_s_u", + args: [-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, 1, 1, 1, 1, 1, 1, 1, 1], + expected: 32768 + }, + //func_i16x8_mul_3_u + { + func: "func_i16x8_mul_3_u", + args: [55, 55, 55, 55, 55, 55, 55, 55, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 110 + }, + { + func: "func_i16x8_mul_3_u", + args: [32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 65534 + }, + //func_i16x8_shl_3 + { + func: "func_i16x8_shl_3_u", + args: [1, 1, 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, 15, 15, 15, 15], + expected: 32768 + }, + { + func: "func_i16x8_shl_3_u", + args: [1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, 16, 16, 16], + expected: 1 + }, + //func_i16x8_shr_3_u + { + func: "func_i16x8_shr_3_u_u", + args: [-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, 15, 15, 15, 15, 15, 15, 15, 15], + expected: 1 + }, + { + func: "func_i16x8_shr_3_u_u", + args: [-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, 16, 16, 16, 16, 16, 16, 16, 16], + expected: 32768 + }, + //func_i16x8_shr_3_s + { + func: "func_i16x8_shr_3_s_u", + args: [-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, 1, 1, 1, 1, 1, 1, 1, 1], + expected: 49152 + }, + { + func: "func_i16x8_shr_3_s_u", + args: [-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, 15, 15, 15, 15, 15, 15, 15, 15], + expected: 65535 + }, + //func_i8x16_add_3_u + { + func: "func_i8x16_add_3_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 130 + }, + { + func: "func_i8x16_add_3_u", + args: [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 1 + }, + //func_i8x16_addsaturate_3_s_u + { + func: "func_i8x16_addsaturate_3_s_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 130 + }, + { + func: "func_i8x16_addsaturate_3_s_u", + args: [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 1 + }, + //func_i8x16_sub_3_u + { + func: "func_i8x16_sub_3_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28], + expected: 100 + }, + { + func: "func_i8x16_sub_3_u", + args: [-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + expected: 127 + }, + //func_i8x16_subsaturate_3_s_u + { + func: "func_i8x16_subsaturate_3_s_u", + args: [118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28], + expected: 90 + }, + { + func: "func_i8x16_subsaturate_3_s_u", + args: [-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + expected: 128 + }, + //func_i8x16_mul_3_u + { + func: "func_i8x16_mul_3_u", + args: [55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 110 + }, + { + func: "func_i8x16_mul_3_u", + args: [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + expected: 254 + }, + //func_i8x16_shl_3 + { + func: "func_i8x16_shl_3_u", + args: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], + expected: 128 + }, + { + func: "func_i8x16_shl_3_u", + args: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], + expected: 1 + }, + //func_i8x16_shr_3_u + { + func: "func_i8x16_shr_3_u_u", + args: [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], + expected: 1 + }, + { + func: "func_i8x16_shr_3_u_u", + args: [-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], + expected: 128 + }, + //func_i8x16_shr_3_s + { + func: "func_i8x16_shr_3_s_u", + args: [-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + expected: 192 + }, + { + func: "func_i8x16_shr_3_s_u", + args: [-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], + expected: 255 + }, + //func_f32x4_add_3 + { + func: "func_f32x4_add_3", + args: [128, 128, 128, 128, 2, 2, 2, 2], + expected: 130 + }, + { + func: "func_f32x4_add_3", + args: [1073741824, 1073741824, 1073741824, 1073741824, 2, 2, 2, 2], + expected: 1073741824 + }, + //func_f32x4_sub_3 + { + func: "func_f32x4_sub_3", + args: [128, 128, 128, 128, 28, 28, 28, 28], + expected: 100 + }, + { + func: "func_f32x4_sub_3", + args: [-1073741824, -1073741824, -1073741824, -1073741824, 1, 1, 1, 1], + expected: -1073741824 + }, + //func_f32x4_mul_3 + { + func: "func_f32x4_mul_3", + args: [55, 55, 55, 55, 2, 2, 2, 2], + expected: 110 + }, + { + func: "func_f32x4_mul_3", + args: [1073741824, 1073741824, 1073741824, 1073741824, 16, 16, 16, 16], + expected: 17179869184 + }, + //func_f32x4_div_3 + { + func: "func_f32x4_div_3", + args: [110, 110, 110, 110, 2, 2, 2, 2], + expected: 55 + }, + { + func: "func_f32x4_div_3", + args: [1073741824, 1073741824, 1073741824, 1073741824, 16, 16, 16, 16], + expected: 67108864 + }, + //func_f32x4_min_3 + { + func: "func_f32x4_min_3", + args: [-10, -10, -10, -10, 5, 5, 5, 5], + expected: -10 + }, + //func_f32x4_max_3 + { + func: "func_f32x4_max_3", + args: [-10, -10, -10, -10, 5, 5, 5, 5], + expected: 5 + }, +]; + +for (let t of tests) { + check(t.expected, "exports." + t.func, t.args); +} + +//NaN Tests +nanTests = [ + //func_f32x4_min_3 + { + func: "func_f32x4_min_3", + args: [Number.NaN, Number.NaN, Number.NaN, Number.NaN, 2, 2, 2, 2] + }, + { + func: "func_f32x4_min_3", + args: [2, 2, 2, 2, Number.NaN, Number.NaN, Number.NaN, Number.NaN] + }, + //func_f32x4_max_3 + { + func: "func_f32x4_max_3", + args: [Number.NaN, Number.NaN, Number.NaN, Number.NaN, 2, 2, 2, 2] + }, + { + func: "func_f32x4_max_3", + args: [2, 2, 2, 2, Number.NaN, Number.NaN, Number.NaN, Number.NaN] + }, + +]; + +for (let t of nanTests) { + checkNaN("exports." + t.func, t.args); +} + + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/binaryArithmeticTests.wasm b/test/wasm.simd/binaryArithmeticTests.wasm new file mode 100755 index 00000000000..6609846c194 Binary files /dev/null and b/test/wasm.simd/binaryArithmeticTests.wasm differ diff --git a/test/wasm.simd/binaryArithmeticTests.wast b/test/wasm.simd/binaryArithmeticTests.wast new file mode 100755 index 00000000000..f4e1e2fe3ea --- /dev/null +++ b/test/wasm.simd/binaryArithmeticTests.wast @@ -0,0 +1,1318 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_i32x4_add_3") + (param i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i32x4 i32x4 i32x4) + (set_local + 8 + (i32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (i32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (i32x4.add + (get_local 8) + (get_local 9) + ) + ) + (i32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_i32x4_sub_3") + (param i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i32x4 i32x4 i32x4) + (set_local + 8 + (i32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (i32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (i32x4.sub + (get_local 8) + (get_local 9) + ) + ) + (i32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_i32x4_mul_3") + (param i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i32x4 i32x4 i32x4) + (set_local + 8 + (i32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (i32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (i32x4.mul + (get_local 8) + (get_local 9) + ) + ) + (i32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_i32x4_shl_3") + (param i32 i32 i32 i32 i32) + (result i32) + (local i32x4) + (set_local + 5 + (i32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 5 + (i32x4.shl + (get_local 5) + (get_local 4) + ) + ) + (i32x4.extractLane + (get_local 5) + (i32.const 3) + ) + ) + (func + (export "func_i32x4_shr_3_s") + (param i32 i32 i32 i32 i32) + (result i32) + (local i32x4) + (set_local + 5 + (i32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 5 + (i32x4.shr_s + (get_local 5) + (get_local 4) + ) + ) + (i32x4.extractLane + (get_local 5) + (i32.const 3) + ) + ) + (func + (export "func_i32x4_shr_3_u") + (param i32 i32 i32 i32 i32) + (result i32) + (local i32x4) + (set_local + 5 + (i32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 5 + (i32x4.shr_u + (get_local 5) + (get_local 4) + ) + ) + (i32x4.extractLane + (get_local 5) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_add_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.add + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_addsaturate_3_s_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.addsaturate_s + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_addsaturate_3_u_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.addsaturate_u + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_sub_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.sub + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_subsaturate_3_s_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.subsaturate_s + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_subsaturate_3_u_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.subsaturate_u + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_mul_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8 i16x8 i16x8) + (set_local + 16 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 17 + (i16x8.build + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 18 + (i16x8.mul + (get_local 16) + (get_local 17) + ) + ) + (i16x8.extractLane_u + (get_local 18) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_shl_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8) + (set_local + 9 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 9 + (i16x8.shl + (get_local 9) + (get_local 8) + ) + ) + (i16x8.extractLane_u + (get_local 9) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_shr_3_s_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8) + (set_local + 9 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 9 + (i16x8.shr_s + (get_local 9) + (get_local 8) + ) + ) + (i16x8.extractLane_u + (get_local 9) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_shr_3_u_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i16x8) + (set_local + 9 + (i16x8.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 9 + (i16x8.shr_u + (get_local 9) + (get_local 8) + ) + ) + (i16x8.extractLane_u + (get_local 9) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_add_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.add + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_addsaturate_3_s_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.addsaturate_s + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_addsaturate_3_u_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.addsaturate_u + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_sub_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.sub + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_subsaturate_3_s_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.subsaturate_s + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_subsaturate_3_u_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.subsaturate_u + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_mul_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16 i8x16 i8x16) + (set_local + 32 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 33 + (i8x16.build + (get_local 16) + (get_local 17) + (get_local 18) + (get_local 19) + (get_local 20) + (get_local 21) + (get_local 22) + (get_local 23) + (get_local 24) + (get_local 25) + (get_local 26) + (get_local 27) + (get_local 28) + (get_local 29) + (get_local 30) + (get_local 31) + ) + ) + (set_local + 34 + (i8x16.mul + (get_local 32) + (get_local 33) + ) + ) + (i8x16.extractLane_u + (get_local 34) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_shl_3_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16) + (set_local + 17 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 17 + (i8x16.shl + (get_local 17) + (get_local 16) + ) + ) + (i8x16.extractLane_u + (get_local 17) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_shr_3_s_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16) + (set_local + 17 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 17 + (i8x16.shr_s + (get_local 17) + (get_local 16) + ) + ) + (i8x16.extractLane_u + (get_local 17) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_shr_3_u_u") + (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + (result i32) + (local i8x16) + (set_local + 17 + (i8x16.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) + (get_local 12) + (get_local 13) + (get_local 14) + (get_local 15) + ) + ) + (set_local + 17 + (i8x16.shr_u + (get_local 17) + (get_local 16) + ) + ) + (i8x16.extractLane_u + (get_local 17) + (i32.const 3) + ) + ) + (func + (export "func_f32x4_add_3") + (param f32 f32 f32 f32 f32 f32 f32 f32) + (result f32) + (local f32x4 f32x4 f32x4) + (set_local + 8 + (f32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (f32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (f32x4.add + (get_local 8) + (get_local 9) + ) + ) + (f32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_f32x4_sub_3") + (param f32 f32 f32 f32 f32 f32 f32 f32) + (result f32) + (local f32x4 f32x4 f32x4) + (set_local + 8 + (f32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (f32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (f32x4.sub + (get_local 8) + (get_local 9) + ) + ) + (f32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_f32x4_mul_3") + (param f32 f32 f32 f32 f32 f32 f32 f32) + (result f32) + (local f32x4 f32x4 f32x4) + (set_local + 8 + (f32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (f32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (f32x4.mul + (get_local 8) + (get_local 9) + ) + ) + (f32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_f32x4_div_3") + (param f32 f32 f32 f32 f32 f32 f32 f32) + (result f32) + (local f32x4 f32x4 f32x4) + (set_local + 8 + (f32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (f32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (f32x4.div + (get_local 8) + (get_local 9) + ) + ) + (f32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_f32x4_min_3") + (param f32 f32 f32 f32 f32 f32 f32 f32) + (result f32) + (local f32x4 f32x4 f32x4) + (set_local + 8 + (f32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (f32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (f32x4.min + (get_local 8) + (get_local 9) + ) + ) + (f32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) + (func + (export "func_f32x4_max_3") + (param f32 f32 f32 f32 f32 f32 f32 f32) + (result f32) + (local f32x4 f32x4 f32x4) + (set_local + 8 + (f32x4.build + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + ) + ) + (set_local + 9 + (f32x4.build + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + (set_local + 10 + (f32x4.max + (get_local 8) + (get_local 9) + ) + ) + (f32x4.extractLane + (get_local 10) + (i32.const 3) + ) + ) +) diff --git a/test/wasm.simd/buildExtractTests.js b/test/wasm.simd/buildExtractTests.js new file mode 100755 index 00000000000..e920ba0bbc4 --- /dev/null +++ b/test/wasm.simd/buildExtractTests.js @@ -0,0 +1,183 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; +function check(expected, funName, ...args) +{ + let fun = eval(funName); + let result; + try { + result = fun(...args); + } catch (e) { + result = e.message; + } + + if(result != expected) { + passed = false; + print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`); + } +} + +let ffi = {}; +let mod, exports; + +function RunInt324Tests() { + + mod = new WebAssembly.Module(readbuffer('i32x4.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(2147483647, "exports.func_i32x4_0"); + check(0, "exports.func_i32x4_1"); + check(-2147483648, "exports.func_i32x4_2"); + check(-100, "exports.func_i32x4_3"); + +} +function RunInt16x8UnsignedTests() { + + mod = new WebAssembly.Module(readbuffer('i16x8_u.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(1, "exports.func_i16x8_u_0"); + check(2, "exports.func_i16x8_u_1"); + check(32768, "exports.func_i16x8_u_2"); + check(0, "exports.func_i16x8_u_3"); + check(32767, "exports.func_i16x8_u_4"); + check(6, "exports.func_i16x8_u_5"); + check(65535, "exports.func_i16x8_u_6"); + check(65526, "exports.func_i16x8_u_7"); +} + +function RunInt16x8SignedTests() { + + mod = new WebAssembly.Module(readbuffer('i16x8_s.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(1, "exports.func_i16x8_s_0"); + check(2, "exports.func_i16x8_s_1"); + check(-32768, "exports.func_i16x8_s_2"); + check(0, "exports.func_i16x8_s_3"); + check(32767, "exports.func_i16x8_s_4"); + check(6, "exports.func_i16x8_s_5"); + check(7, "exports.func_i16x8_s_6"); + check(-10, "exports.func_i16x8_s_7"); +} + +function RunInt8x16UnsignedTests() { + + mod = new WebAssembly.Module(readbuffer('i8x16_u.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(1, "exports.func_i8x16_u_0"); + check(2, "exports.func_i8x16_u_1"); + check(128, "exports.func_i8x16_u_2"); + check(4, "exports.func_i8x16_u_3"); + check(127, "exports.func_i8x16_u_4"); + check(6, "exports.func_i8x16_u_5"); + check(7, "exports.func_i8x16_u_6"); + check(8, "exports.func_i8x16_u_7"); + check(9, "exports.func_i8x16_u_8"); + check(255, "exports.func_i8x16_u_9"); + check(11, "exports.func_i8x16_u_10"); + check(12, "exports.func_i8x16_u_11"); + check(13, "exports.func_i8x16_u_12"); + check(14, "exports.func_i8x16_u_13"); + check(15, "exports.func_i8x16_u_14"); + check(236, "exports.func_i8x16_u_15"); +} + +function RunInt8x16SignedTests() { + + mod = new WebAssembly.Module(readbuffer('i8x16_s.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(1, "exports.func_i8x16_s_0"); + check(2, "exports.func_i8x16_s_1"); + check(-128, "exports.func_i8x16_s_2"); + check(4, "exports.func_i8x16_s_3"); + check(127, "exports.func_i8x16_s_4"); + check(6, "exports.func_i8x16_s_5"); + check(7, "exports.func_i8x16_s_6"); + check(8, "exports.func_i8x16_s_7"); + check(9, "exports.func_i8x16_s_8"); + check(10, "exports.func_i8x16_s_9"); + check(11, "exports.func_i8x16_s_10"); + check(12, "exports.func_i8x16_s_11"); + check(13, "exports.func_i8x16_s_12"); + check(14, "exports.func_i8x16_s_13"); + check(15, "exports.func_i8x16_s_14"); + check(-20, "exports.func_i8x16_s_15"); +} + +function RunFloat32x4Tests() { + + mod = new WebAssembly.Module(readbuffer('f32x4.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(1.0, "exports.func_f32x4_0"); + check(2147483648, "exports.func_f32x4_1"); + check(2147483648, "exports.func_f32x4_2"); + check(-2147483648, "exports.func_f32x4_3"); +} + +function RunBool32x4Tests() { + mod = new WebAssembly.Module(readbuffer('b32x4.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(false, "exports.func_b32x4_0"); + check(true, "exports.func_b32x4_1"); + check(true, "exports.func_b32x4_2"); + check(true, "exports.func_b32x4_3"); +} + +function RunBool16x8Tests() { + mod = new WebAssembly.Module(readbuffer('b16x8.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(true, "exports.func_b16x8_0"); + check(false, "exports.func_b16x8_1"); + check(true, "exports.func_b16x8_2"); + check(true, "exports.func_b16x8_3"); + check(true, "exports.func_b16x8_4"); + check(true, "exports.func_b16x8_5"); + check(true, "exports.func_b16x8_6"); + check(true, "exports.func_b16x8_7"); +} + +function RunBool8x16Tests() { + mod = new WebAssembly.Module(readbuffer('b8x16.wasm')); + exports = new WebAssembly.Instance(mod, ffi).exports; + + check(true, "exports.func_b8x16_0"); + check(false, "exports.func_b8x16_1"); + check(true, "exports.func_b8x16_2"); + check(true, "exports.func_b8x16_3"); + check(true, "exports.func_b8x16_4"); + check(true, "exports.func_b8x16_5"); + check(true, "exports.func_b8x16_6"); + check(true, "exports.func_b8x16_7"); + check(true, "exports.func_b8x16_8"); + check(true, "exports.func_b8x16_9"); + check(true, "exports.func_b8x16_10"); + check(true, "exports.func_b8x16_11"); + check(true, "exports.func_b8x16_12"); + check(true, "exports.func_b8x16_13"); + check(true, "exports.func_b8x16_14"); + check(true, "exports.func_b8x16_15"); +} + +//Tests +RunInt324Tests(); +RunInt16x8UnsignedTests(); +RunInt16x8SignedTests(); +RunInt8x16UnsignedTests(); +RunInt8x16SignedTests(); +RunFloat32x4Tests(); +RunBool32x4Tests(); +RunBool16x8Tests(); +RunBool8x16Tests(); + +if(passed) { + print("Passed"); +} diff --git a/test/wasm.simd/comp.wasm b/test/wasm.simd/comp.wasm new file mode 100755 index 00000000000..73953edbe6f Binary files /dev/null and b/test/wasm.simd/comp.wasm differ diff --git a/test/wasm.simd/comp.wast b/test/wasm.simd/comp.wast new file mode 100755 index 00000000000..0a2f2e69790 --- /dev/null +++ b/test/wasm.simd/comp.wast @@ -0,0 +1,559 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + + (func (export "func_i32x4_compare_s") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (i32x4.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.lt_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.le_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.gt_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.ge_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + + (func (export "func_i32x4_compare_u") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (i32x4.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.lt_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.le_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.gt_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i32x4.ge_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + + (func (export "func_i16x8_compare_s") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (i16x8.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.lt_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.le_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.gt_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.ge_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + + (func (export "func_i16x8_compare_u") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (i16x8.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.lt_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.le_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.gt_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i16x8.ge_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + + (func (export "func_i8x16_compare_s") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (i8x16.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.lt_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.le_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.gt_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.ge_s + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + + (func (export "func_i8x16_compare_u") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (i8x16.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.lt_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.le_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.gt_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (i8x16.ge_u + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + + (func (export "func_f32x4_compare") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (f32x4.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (f32x4.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f32x4.lt + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f32x4.le + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f32x4.gt + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f32x4.ge + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) + + (func (export "func_f64x2_compare") (param $2 i32) + (block $label$0 + (block $label$1 + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_if $label$5 + (i32.gt_u + (get_local $2) + (i32.const 5) + ) + ) + (block $label$6 + (br_table $label$6 $label$4 $label$3 $label$2 $label$1 $label$0 $label$6 + (get_local $2) + ) + ) + (m128.store offset=0 (i32.const 0) + (f64x2.eq + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (unreachable) ;; invalid operation + ) + (m128.store offset=0 (i32.const 0) + (f64x2.ne + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f64x2.lt + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f64x2.le + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f64x2.gt + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + (return) + ) + (m128.store offset=0 (i32.const 0) + (f64x2.ge + (m128.load offset=0 align=4 (i32.const 0)) + (m128.load offset=16 align=4 (i32.const 0)) + ) + ) + ) +) diff --git a/test/wasm.simd/comparisonTests.js b/test/wasm.simd/comparisonTests.js new file mode 100755 index 00000000000..20f787b5ee5 --- /dev/null +++ b/test/wasm.simd/comparisonTests.js @@ -0,0 +1,756 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- +let passed = true; + +const OPS = Object.freeze({ + EQ : 0, + NE : 1, + LT : 2, + LE : 3, + GT : 4, + GE : 5 +}); + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const module = new WebAssembly.Module(readbuffer('comp.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; +const arrays = { + "i32x4" : new Int32Array (memObj.buffer), + "i16x8" : new Int16Array (memObj.buffer), + "i8x16" : new Int8Array (memObj.buffer), + "f32x4" : new Float32Array (memObj.buffer), + "f64x2" : new Float64Array (memObj.buffer) +}; + +function moveArgsIntoArray(args, offset, arr) { + for (let i = 0; i < args.length; i++) { + arr[offset + i] = args[i]; + } +} + +let testCompOps = function (funcname, args1, args2, op, resultArr) { + + const len = args1.length; + const arr = arrays[funcname.split('_')[1]]; + + moveArgsIntoArray(args1, 0, arr); + moveArgsIntoArray(args2, len, arr); + instance[funcname](op); + for (let i = 0; i < len; i++) { + assertEquals(resultArr[i], Number.isNaN(arr[i]) || !!arr[i]); + } +} + +testCompOps("func_i32x4_compare_s", + [2147483647,0,-100,-2147483648], + [2147483647,45,202,-2147483648], + OPS.EQ, + [1,0,0,1] +); + +testCompOps("func_i32x4_compare_s", + [277,2147483647,-100,2147483647], + [431,2147483647,555,2147483647], + OPS.EQ, + [0,1,0,1] +); + +testCompOps("func_i32x4_compare_s", + [2147483647,0,-100,-2147483648], + [2147483647,45,202,-2147483648], + OPS.NE, + [0,1,1,0] +); + +testCompOps("func_i32x4_compare_s", + [277,2147483647,-100,2147483647], + [431,2147483647,555,2147483647], + OPS.NE, + [1,0,1,0] +); + +testCompOps("func_i32x4_compare_s", + [-1,1440,2100,-2147483648], + [2147483647,45,202,4], + OPS.LT, + [1,0,0,1] +); + +testCompOps("func_i32x4_compare_s", + [431,-2147483646,555,21474836], + [277,2147483647,-100,2147483647], + OPS.LT, + [0,1,0,1] +); + +testCompOps("func_i32x4_compare_s", + [2147483647,1440,2100,4], + [2147483647,45,202,4], + OPS.LE, + [1,0,0,1] +); + +testCompOps("func_i32x4_compare_s", + [431,-2147483646,555,21474835], + [277,-2147483646,-100,21474836], + OPS.LE, + [0,1,0,1] +); + +testCompOps("func_i32x4_compare_s", + [-1,1440,2100,-2147483648], + [2147483647,45,202,4], + OPS.GT, + [0,1,1,0] +); + +testCompOps("func_i32x4_compare_s", + [431,-2147483646,555,21474836], + [277,2147483647,-100,2147483647], + OPS.GT, + [1,0,1,0] +); + +testCompOps("func_i32x4_compare_s", + [-1,1440,2100,-2147483648], + [2147483647,1440,202,4], + OPS.GE, + [0,1,1,0] +); + +testCompOps("func_i32x4_compare_s", + [431,-2147483646,-100,21474836], + [277,2147483647,-100,2147483647], + OPS.GE, + [1,0,1,0] +); + +testCompOps("func_i32x4_compare_u", + [2147483647,0,-100,-2147483648], + [2147483647,45,202,-2147483648], + OPS.EQ, + [1,0,0,1] +); + +testCompOps("func_i32x4_compare_u", + [277,2147483647,-100,2147483647], + [431,2147483647,555,2147483647], + OPS.EQ, + [0,1,0,1] +); + +testCompOps("func_i32x4_compare_u", + [2147483647,0,-100,-2147483648], + [2147483647,45,202,-2147483648], + OPS.NE, + [0,1,1,0] +); + +testCompOps("func_i32x4_compare_u", + [277,2147483647,-100,2147483647], + [431,2147483647,555,2147483647], + OPS.NE, + [1,0,1,0] +); + +testCompOps("func_i32x4_compare_u", + [-100,1440,2100,-2147483648], + [-1,45,202,-2147483647], + OPS.LT, + [1,0,0,1] +); + +testCompOps("func_i32x4_compare_u", + [431,2147483646,555,21474836], + [277,-2147483647,100,2147483647], + OPS.LT, + [0,1,0,1] +); + +testCompOps("func_i32x4_compare_u", + [2147483647,1440,2100,4], + [2147483647,45,202,4], + OPS.LE, + [1,0,0,1] +); + +testCompOps("func_i32x4_compare_u", + [431,-2147483646,-555,21474835], + [277,-2147483646,100,21474836], + OPS.LE, + [0,1,0,1] +); + +testCompOps("func_i32x4_compare_u", + [2147483647,1440,2100,2147483647], + [-1,45,202,-4], + OPS.GT, + [0,1,1,0] +); + +testCompOps("func_i32x4_compare_u", + [431,2147483646,555,21474836], + [277,-2147483647,100,2147483647], + OPS.GT, + [1,0,1,0] +); + +testCompOps("func_i32x4_compare_u", + [1,1440,2100,2147483648], + [-2147483647,1440,202,-4], + OPS.GE, + [0,1,1,0] +); + +testCompOps("func_i32x4_compare_u", + [431,2147483646,-100,21474836], + [277,-2147483647,-100,2147483647], + OPS.GE, + [1,0,1,0] +); + +testCompOps("func_i16x8_compare_s", + [65535,0,-100,-777,4,2,207,-65536], + [65535,0,-123,-777,3,1,202,-65536], + OPS.EQ, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_s", + [11,2147483647,-200,2147483647,431,2147483647,555,2147483647], + [277,2147483647,-100,2147483647,432,2147483647,755,2147483647], + OPS.EQ, + [0,1,0,1,0,1,0,1] +); + +testCompOps("func_i16x8_compare_s", + [65535,0,-100,-777,4,2,207,-65536], + [65535,0,-123,-777,3,1,202,-65536], + OPS.NE, + [0,0,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_s", + [11,2147483647,-200,2147483647,431,2147483647,555,2147483647], + [277,2147483647,-100,2147483647,432,2147483647,755,2147483647], + OPS.NE, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_s", + [32766,-1,-100,-778,4,2,207,-32768], + [32767,0,-123,-777,3,1,202,-32767], + OPS.LT, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_s", + [1111,32766,-99,32765,1001,1,855,-32768], + [277,32767,-100,32766,432,2,755,-32767], + OPS.LT, + [0,1,0,1,0,1,0,1] +); + +testCompOps("func_i16x8_compare_s", + [32767,-1,-100,-778,4,2,207,-32768], + [32767,-1,-123,-777,3,1,202,-32767], + OPS.LE, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_s", + [1111,32766,-99,32765,1001,1,855,-32768], + [277,32767,-100,32766,432,2,855,-32767], + OPS.LE, + [0,1,0,1,0,1,1,1] +); + +testCompOps("func_i16x8_compare_s", + [32766,-1,-100,-778,4,2,207,-32768], + [32767,0,-123,-777,3,1,202,-32767], + OPS.GT, + [0,0,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_s", + [1111,32766,-99,32765,1001,1,855,-32768], + [277,32767,-100,32766,432,2,755,-32767], + OPS.GT, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_s", + [32767,-1,-100,-778,4,2,207,-32768], + [32767,-1,-123,-777,3,1,202,-32767], + OPS.GE, + [1,1,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_s", + [1111,32766,-99,32765,1001,1,855,-32768], + [277,32767,-100,32766,432,2,855,-32767], + OPS.GE, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_u", + [65534,32766,129,0,4,2,207,65530], + [65535,32767,123,1,3,1,202,65531], + OPS.LT, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_u", + [1111,32766,65535,32765,1001,1,855,32767], + [277,32767,65531,32766,432,2,755,32768], + OPS.LT, + [0,1,0,1,0,1,0,1] +); + +testCompOps("func_i16x8_compare_u", + [65535,32766,129,0,4,2,207,65530], + [65535,32767,123,1,3,1,202,65531], + OPS.LE, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_u", + [1111,32766,65535,32765,1001,1,855,32767], + [277,32767,65531,32766,432,2,855,32768], + OPS.LE, + [0,1,0,1,0,1,1,1] +); + +testCompOps("func_i16x8_compare_u", + [65534,32766,129,0,4,2,207,65530], + [65535,32767,123,1,3,1,202,65531], + OPS.GT, + [0,0,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_u", + [1111,32766,65535,32765,1001,1,855,32767], + [277,32767,65531,32766,432,2,755,32768], + OPS.GT, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_u", + [65535,32767,129,0,4,2,207,65530], + [65535,32766,123,1,3,1,202,65531], + OPS.GE, + [1,1,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_u", + [1111,32766,65535,32765,1001,1,855,32767], + [277,32767,65531,32766,432,2,855,32768], + OPS.GE, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_s", + [255,0,-100,-77,4,2,207,-1], + [255,0,-123,-77,3,1,202,-1], + OPS.EQ, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_s", + [11,127,-111,127,43,127,55,127], + [12,127,-100,127,42,127,75,127], + OPS.EQ, + [0,1,0,1,0,1,0,1] +); + +testCompOps("func_i16x8_compare_s", + [255,0,-100,-77,4,2,207,-1], + [255,0,-123,-77,3,1,202,-1], + OPS.NE, + [0,0,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_s", + [11,127,-111,127,431,127,555,127], + [12,127,-100,127,432,127,755,127], + OPS.NE, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_s", + [126,-1,-100,-78,4,2,27,-128], + [127,0,-123,-77,3,1,22,-127], + OPS.LT, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_s", + [111,126,-99,64,101,1,85,-128], + [12,127,-100,126,43,2,75,-127], + OPS.LT, + [0,1,0,1,0,1,0,1] +); + +testCompOps("func_i16x8_compare_s", + [127,-1,-100,-78,4,2,27,-128], + [127,-1,-123,-77,3,1,22,-127], + OPS.LE, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_s", + [111,126,-99,64,101,1,85,-128], + [12,127,-100,126,43,2,85,-127], + OPS.LE, + [0,1,0,1,0,1,1,1] +); + +testCompOps("func_i16x8_compare_s", + [126,-1,-100,-78,4,2,27,-128], + [127,0,-123,-77,3,1,22,-127], + OPS.GT, + [0,0,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_s", + [111,126,-99,64,101,1,85,-128], + [12,127,-100,126,43,2,75,-127], + OPS.GT, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_s", + [127,-1,-100,-78,4,2,27,-128], + [127,-1,-123,-77,3,1,22,-127], + OPS.GE, + [1,1,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_s", + [111,126,-99,64,101,1,85,-128], + [12,127,-100,126,42,2,85,-127], + OPS.GE, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_u", + [253,126,129,0,4,2,207,63], + [255,127,123,1,3,1,202,65], + OPS.LT, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_u", + [111,126,255,64,101,1,85,127], + [12,127,65,126,43,2,75,128], + OPS.LT, + [0,1,0,1,0,1,0,1] +); + +testCompOps("func_i16x8_compare_u", + [255,126,129,0,4,2,207,63], + [255,127,123,1,3,1,202,65], + OPS.LE, + [1,1,0,1,0,0,0,1] +); + +testCompOps("func_i16x8_compare_u", + [111,126,255,64,101,1,85,127], + [12,127,65,126,43,2,85,128], + OPS.LE, + [0,1,0,1,0,1,1,1] +); + +testCompOps("func_i16x8_compare_u", + [253,126,129,0,4,2,207,63], + [255,127,123,1,3,1,202,65], + OPS.GT, + [0,0,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_u", + [111,126,255,64,101,1,85,127], + [12,127,200,126,43,2,75,128], + OPS.GT, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_i16x8_compare_u", + [255,127,129,0,4,2,207,63], + [255,126,123,1,3,1,202,65], + OPS.GE, + [1,1,1,0,1,1,1,0] +); + +testCompOps("func_i16x8_compare_u", + [111,126,255,64,101,1,85,127], + [12,127,200,126,43,2,85,128], + OPS.GE, + [1,0,1,0,1,0,1,0] +); + +testCompOps("func_f32x4_compare", + [1234.5,0,NaN,-567.25], + [1234.5,45,202,-567.25], + OPS.EQ, + [1,0,0,1] +); + +testCompOps("func_f32x4_compare", + [277,1234.5,-100,1234.5], + [431,1234.5,NaN,1234.5], + OPS.EQ, + [0,1,0,1] +); + +testCompOps("func_f32x4_compare", + [1234.5,0,-100,-567.25], + [1234.5,45,NaN,-567.25], + OPS.NE, + [0,1,1,0] +); + +testCompOps("func_f32x4_compare", + [277,1234.5,-100,1234.5], + [431,1234.5,NaN,1234.5], + OPS.NE, + [1,0,1,0] +); + +testCompOps("func_f32x4_compare", + [-1,1440,2100,-567.25], + [1234.5,45,202,4], + OPS.LT, + [1,0,0,1] +); + +testCompOps("func_f32x4_compare", + [431,-2147483646,555,1234.1], + [277,1234.5,-100,1234.5], + OPS.LT, + [0,1,0,1] +); + +testCompOps("func_f32x4_compare", + [1234.5,1440,2100,4], + [1234.5,45,202,4], + OPS.LE, + [1,0,0,1] +); + +testCompOps("func_f32x4_compare", + [431,-2147483646,555,21474835], + [277,-2147483646,-100,21474836], + OPS.LE, + [0,1,0,1] +); + +testCompOps("func_f32x4_compare", + [-1,1440,2100,-567.25], + [1234.5,45,202,4], + OPS.GT, + [0,1,1,0] +); + +testCompOps("func_f32x4_compare", + [431,-2147483646,555,1234.4], + [277,1234.5,-100,1234.5], + OPS.GT, + [1,0,1,0] +); + +testCompOps("func_f32x4_compare", + [-1,1440,2100,-567.25], + [1234.5,1440,202,4], + OPS.GE, + [0,1,1,0] +); + +testCompOps("func_f32x4_compare", + [431,-2147483646,-100,1234.4], + [277,1234.5,-100,1234.5], + OPS.GE, + [1,0,1,0] +); + +testCompOps("func_f64x2_compare", + [1234.5,0], + [1234.5,45], + OPS.EQ, + [1,0] +); + +testCompOps("func_f64x2_compare", + [NaN,-567.25], + [202,-567.25], + OPS.EQ, + [0,1] +); + +testCompOps("func_f64x2_compare", + [277,1234.5], + [431,1234.5], + OPS.EQ, + [0,1] +); + +testCompOps("func_f64x2_compare", + [-100,1234.5], + [NaN,1234.5], + OPS.EQ, + [0,1] +); + +testCompOps("func_f64x2_compare", + [1234.5,0], + [1234.5,45], + OPS.NE, + [0,1] +); + +testCompOps("func_f64x2_compare", + [-100,-567.25], + [NaN,-567.25], + OPS.NE, + [1,0] +); + +testCompOps("func_f64x2_compare", + [277,1234.5], + [431,1234.5], + OPS.NE, + [1,0] +); + +testCompOps("func_f64x2_compare", + [-100,1234.5], + [NaN,1234.5], + OPS.NE, + [1,0] +); + +testCompOps("func_f64x2_compare", + [-1,1440], + [1234.5,45], + OPS.LT, + [1,0] +); + +testCompOps("func_f64x2_compare", + [2100,-567.25], + [202,4], + OPS.LT, + [0,1] +); + +testCompOps("func_f64x2_compare", + [431,-2147483646], + [277,1234.5], + OPS.LT, + [0,1] +); + +testCompOps("func_f64x2_compare", + [555,1234.1], + [-100,1234.5], + OPS.LT, + [0,1] +); + +testCompOps("func_f64x2_compare", + [1234.5,1440], + [1234.5,45], + OPS.LE, + [1,0] +); + +testCompOps("func_f64x2_compare", + [2100,4], + [202,4], + OPS.LE, + [0,1] +); + +testCompOps("func_f64x2_compare", + [431,-2147483646], + [277,-2147483646], + OPS.LE, + [0,1] +); + +testCompOps("func_f64x2_compare", + [555,21474835], + [-100,21474836], + OPS.LE, + [0,1] +); + +testCompOps("func_f64x2_compare", + [-1,1440], + [1234.5,45], + OPS.GT, + [0,1] +); + +testCompOps("func_f64x2_compare", + [2100,-567.25], + [202,4], + OPS.GT, + [1,0] +); + +testCompOps("func_f64x2_compare", + [431,-2147483646], + [277,1234.5], + OPS.GT, + [1,0] +); + +testCompOps("func_f64x2_compare", + [555,1234.4], + [-100,1234.5], + OPS.GT, + [1,0] +); + +testCompOps("func_f64x2_compare", + [-1,1440], + [1234.5,1440], + OPS.GE, + [0,1] +); + +testCompOps("func_f64x2_compare", + [2100,-567.25], + [202,4], + OPS.GE, + [1,0] +); + +testCompOps("func_f64x2_compare", + [-100,1234.4], + [-100,1234.5], + OPS.GE, + [1,0] +); + +testCompOps("func_f64x2_compare", + [431,-2147483646], + [277,1234.5], + OPS.GE, + [1,0] +); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/const.wasm b/test/wasm.simd/const.wasm new file mode 100755 index 00000000000..51979d0d9f0 Binary files /dev/null and b/test/wasm.simd/const.wasm differ diff --git a/test/wasm.simd/const.wast b/test/wasm.simd/const.wast new file mode 100644 index 00000000000..1807d2e5aed --- /dev/null +++ b/test/wasm.simd/const.wast @@ -0,0 +1,104 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "m128_const_1") (local $v1 m128) + (set_local $v1 + (m128.const + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0xcc) + (i32.const 0xab) + (i32.const 0x0) + (i32.const 0xff) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + ) + ) + (m128.store offset=0 (i32.const 0) (get_local $v1)) + ) + + (func (export "m128_const_2") (local $v1 m128) + (set_local $v1 + (m128.const + (i32.const 0x0) + (i32.const 0xbc) + (i32.const 0x0) + (i32.const 0xa1) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0x0) + (i32.const 0xff) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x1) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + ) + ) + (m128.store offset=0 (i32.const 0) (get_local $v1)) + ) + + (func (export "m128_const_3") (local $v1 m128) + (set_local $v1 + (m128.const + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + (i32.const 0xff) + ) + ) + (m128.store offset=0 (i32.const 0) (get_local $v1)) + ) + + (func (export "m128_const_4") (local $v1 m128) + (set_local $v1 + (m128.const + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + (i32.const 0x0) + ) + ) + (m128.store offset=0 (i32.const 0) (get_local $v1)) + ) +) diff --git a/test/wasm.simd/constTests.js b/test/wasm.simd/constTests.js new file mode 100755 index 00000000000..9bdca3026d7 --- /dev/null +++ b/test/wasm.simd/constTests.js @@ -0,0 +1,37 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const arr = new Uint32Array (memObj.buffer); + +const module = new WebAssembly.Module(readbuffer('const.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; + +let testIntLogicalOps = function (funcname, resultArr) { + const len = 4 + instance[funcname](); + for (let i = 0; i < len; i++) { + assertEquals(arr[i], resultArr[i]); + } +} + +testIntLogicalOps("m128_const_1", [0, 0xFF00ABCC, 0, 0]); +testIntLogicalOps("m128_const_2", [0xA100BC00, 0xFFFFFFFF, 0xFF00, 0x1]); +testIntLogicalOps("m128_const_3", [0xFFFFFFFF, 0xFFFFFFFF, 0, 0xFFFFFFFF]); +testIntLogicalOps("m128_const_4", [0, 0, 0, 0]); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/f32x4.wasm b/test/wasm.simd/f32x4.wasm new file mode 100755 index 00000000000..c2a084980cf Binary files /dev/null and b/test/wasm.simd/f32x4.wasm differ diff --git a/test/wasm.simd/f32x4.wast b/test/wasm.simd/f32x4.wast new file mode 100755 index 00000000000..576e85abdbe --- /dev/null +++ b/test/wasm.simd/f32x4.wast @@ -0,0 +1,79 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_f32x4_0") + (result f32) + (local f32x4) + (set_local + 0 + (f32x4.build + (f32.const 1.0) + (f32.const 2147483647) + (f32.const 2147483646.9) + (f32.const -2147483647.5) + ) + ) + (f32x4.extractLane + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_f32x4_1") + (result f32) + (local f32x4) + (set_local + 0 + (f32x4.build + (f32.const 1.0) + (f32.const 2147483647) + (f32.const 2147483646.9) + (f32.const -2147483647.5) + ) + ) + (f32x4.extractLane + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_f32x4_2") + (result f32) + (local f32x4) + (set_local + 0 + (f32x4.build + (f32.const 1.0) + (f32.const 2147483647) + (f32.const 2147483646.9) + (f32.const -2147483647.5) + ) + ) + (f32x4.extractLane + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_f32x4_3") + (result f32) + (local f32x4) + (set_local + 0 + (f32x4.build + (f32.const 1.0) + (f32.const 2147483647) + (f32.const 2147483646.9) + (f32.const -2147483647.5) + ) + ) + (f32x4.extractLane + (get_local 0) + (i32.const 3) + ) + ) +) diff --git a/test/wasm.simd/i16x8_s.wasm b/test/wasm.simd/i16x8_s.wasm new file mode 100755 index 00000000000..d42ad7ceb84 Binary files /dev/null and b/test/wasm.simd/i16x8_s.wasm differ diff --git a/test/wasm.simd/i16x8_s.wast b/test/wasm.simd/i16x8_s.wast new file mode 100755 index 00000000000..231555a76cc --- /dev/null +++ b/test/wasm.simd/i16x8_s.wast @@ -0,0 +1,183 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_i16x8_s_0") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_i16x8_s_1") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_i16x8_s_2") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_i16x8_s_3") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_s_4") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 4) + ) + ) + (func + (export "func_i16x8_s_5") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 5) + ) + ) + (func + (export "func_i16x8_s_6") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 6) + ) + ) + (func + (export "func_i16x8_s_7") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 7) + (i32.const -10) + ) + ) + (i16x8.extractLane_s + (get_local 0) + (i32.const 7) + ) + ) +) diff --git a/test/wasm.simd/i16x8_u.wasm b/test/wasm.simd/i16x8_u.wasm new file mode 100755 index 00000000000..c7b758db5fa Binary files /dev/null and b/test/wasm.simd/i16x8_u.wasm differ diff --git a/test/wasm.simd/i16x8_u.wast b/test/wasm.simd/i16x8_u.wast new file mode 100755 index 00000000000..c27c9a2a0b5 --- /dev/null +++ b/test/wasm.simd/i16x8_u.wast @@ -0,0 +1,183 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_i16x8_u_0") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_i16x8_u_1") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_i16x8_u_2") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_i16x8_u_3") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 3) + ) + ) + (func + (export "func_i16x8_u_4") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 4) + ) + ) + (func + (export "func_i16x8_u_5") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 5) + ) + ) + (func + (export "func_i16x8_u_6") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 6) + ) + ) + (func + (export "func_i16x8_u_7") + (result i32) + (local i16x8) + (set_local + 0 + (i16x8.build + (i32.const 1) + (i32.const 2) + (i32.const -32768) + (i32.const 0) + (i32.const 32767) + (i32.const 6) + (i32.const 65535) + (i32.const -10) + ) + ) + (i16x8.extractLane_u + (get_local 0) + (i32.const 7) + ) + ) +) diff --git a/test/wasm.simd/i32x4.wasm b/test/wasm.simd/i32x4.wasm new file mode 100755 index 00000000000..f32435eaccb Binary files /dev/null and b/test/wasm.simd/i32x4.wasm differ diff --git a/test/wasm.simd/i32x4.wast b/test/wasm.simd/i32x4.wast new file mode 100755 index 00000000000..fa152cfa60f --- /dev/null +++ b/test/wasm.simd/i32x4.wast @@ -0,0 +1,79 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_i32x4_0") + (result i32) + (local i32x4) + (set_local + 0 + (i32x4.build + (i32.const 2147483647) + (i32.const 0) + (i32.const -2147483648) + (i32.const -100) + ) + ) + (i32x4.extractLane + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_i32x4_1") + (result i32) + (local i32x4) + (set_local + 0 + (i32x4.build + (i32.const 2147483647) + (i32.const 0) + (i32.const -2147483648) + (i32.const -100) + ) + ) + (i32x4.extractLane + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_i32x4_2") + (result i32) + (local i32x4) + (set_local + 0 + (i32x4.build + (i32.const 2147483647) + (i32.const 0) + (i32.const -2147483648) + (i32.const -100) + ) + ) + (i32x4.extractLane + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_i32x4_3") + (result i32) + (local i32x4) + (set_local + 0 + (i32x4.build + (i32.const 2147483647) + (i32.const 0) + (i32.const -2147483648) + (i32.const -100) + ) + ) + (i32x4.extractLane + (get_local 0) + (i32.const 3) + ) + ) +) diff --git a/test/wasm.simd/i8x16_s.wasm b/test/wasm.simd/i8x16_s.wasm new file mode 100755 index 00000000000..c3837c56e22 Binary files /dev/null and b/test/wasm.simd/i8x16_s.wasm differ diff --git a/test/wasm.simd/i8x16_s.wast b/test/wasm.simd/i8x16_s.wast new file mode 100755 index 00000000000..6986c233ea3 --- /dev/null +++ b/test/wasm.simd/i8x16_s.wast @@ -0,0 +1,487 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_i8x16_s_0") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_i8x16_s_1") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_i8x16_s_2") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_i8x16_s_3") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_s_4") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 4) + ) + ) + (func + (export "func_i8x16_s_5") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 5) + ) + ) + (func + (export "func_i8x16_s_6") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 6) + ) + ) + (func + (export "func_i8x16_s_7") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 7) + ) + ) + (func + (export "func_i8x16_s_8") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 8) + ) + ) + (func + (export "func_i8x16_s_9") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 9) + ) + ) + (func + (export "func_i8x16_s_10") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 10) + ) + ) + (func + (export "func_i8x16_s_11") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 11) + ) + ) + (func + (export "func_i8x16_s_12") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 12) + ) + ) + (func + (export "func_i8x16_s_13") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 13) + ) + ) + (func + (export "func_i8x16_s_14") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 14) + ) + ) + (func + (export "func_i8x16_s_15") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_s + (get_local 0) + (i32.const 15) + ) + ) +) diff --git a/test/wasm.simd/i8x16_u.wasm b/test/wasm.simd/i8x16_u.wasm new file mode 100755 index 00000000000..13378fa4211 Binary files /dev/null and b/test/wasm.simd/i8x16_u.wasm differ diff --git a/test/wasm.simd/i8x16_u.wast b/test/wasm.simd/i8x16_u.wast new file mode 100755 index 00000000000..47df997820b --- /dev/null +++ b/test/wasm.simd/i8x16_u.wast @@ -0,0 +1,487 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (func + (export "func_i8x16_u_0") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 0) + ) + ) + (func + (export "func_i8x16_u_1") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 1) + ) + ) + (func + (export "func_i8x16_u_2") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 2) + ) + ) + (func + (export "func_i8x16_u_3") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 3) + ) + ) + (func + (export "func_i8x16_u_4") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 4) + ) + ) + (func + (export "func_i8x16_u_5") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 5) + ) + ) + (func + (export "func_i8x16_u_6") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 6) + ) + ) + (func + (export "func_i8x16_u_7") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 7) + ) + ) + (func + (export "func_i8x16_u_8") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 8) + ) + ) + (func + (export "func_i8x16_u_9") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 9) + ) + ) + (func + (export "func_i8x16_u_10") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 10) + ) + ) + (func + (export "func_i8x16_u_11") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 11) + ) + ) + (func + (export "func_i8x16_u_12") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 12) + ) + ) + (func + (export "func_i8x16_u_13") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 13) + ) + ) + (func + (export "func_i8x16_u_14") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 14) + ) + ) + (func + (export "func_i8x16_u_15") + (result i32) + (local i8x16) + (set_local + 0 + (i8x16.build + (i32.const 1) + (i32.const 2) + (i32.const -128) + (i32.const 4) + (i32.const 127) + (i32.const 6) + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 255) + (i32.const 11) + (i32.const 12) + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const -20) + ) + ) + (i8x16.extractLane_u + (get_local 0) + (i32.const 15) + ) + ) +) diff --git a/test/wasm.simd/int64x2.wasm b/test/wasm.simd/int64x2.wasm new file mode 100755 index 00000000000..7106021b470 Binary files /dev/null and b/test/wasm.simd/int64x2.wasm differ diff --git a/test/wasm.simd/int64x2.wast b/test/wasm.simd/int64x2.wast new file mode 100644 index 00000000000..840c44dd4e4 --- /dev/null +++ b/test/wasm.simd/int64x2.wast @@ -0,0 +1,86 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "i16x8_anytrue") (result i32) + (i64x2.any_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "i16x8_alltrue") (result i32) + (i64x2.all_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "func_i64x2_extractlane_0") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (i64.store offset=0 (i32.const 8) (i64x2.extract_lane lane=0 (get_local $v1))) + ) + + (func (export "func_i64x2_extractlane_1") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (i64.store offset=0 (i32.const 0) (i64x2.extract_lane lane=1 (get_local $v1))) + ) + + (func (export "func_i64x2_replacelane_0") (local $v1 m128) (local $val i64) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $val (i64.load offset=16 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.replace_lane lane=0 (get_local $v1) (get_local $val))) + ) + + (func (export "func_i64x2_replacelane_1") (local $v1 m128) (local $val i64) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $val (i64.load offset=16 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.replace_lane lane=1 (get_local $v1) (get_local $val))) + ) + + (func (export "func_i64x2_splat") (local $v1 i64) (local $v2 m128) + (set_local $v1 (i64.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (i64x2.splat (get_local $v1))) + (m128.store offset=0 (i32.const 0) (get_local $v2)) + ) + + (func (export "func_i64x2_add") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i64x2.add (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i64x2_sub") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i64x2.sub (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i64x2_neg") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.neg (get_local $v1))) + ) + + (func (export "func_i64x2_anytrue") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (i32.store offset=0 (i32.const 0) (i64x2.any_true (get_local $v1))) + ) + + (func (export "func_i64x2_alltrue") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (i32.store offset=0 (i32.const 0) (i64x2.all_true (get_local $v1))) + ) + + (func (export "func_i64x2_shl") (param $shamt i32) (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.shl (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i64x2_shr_s") (param $shamt i32) (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.shr_s (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i64x2_shr_u") (param $shamt i32) (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.shr_u (get_local $v1) (get_local $shamt))) + ) +) diff --git a/test/wasm.simd/int64x2Tests.js b/test/wasm.simd/int64x2Tests.js new file mode 100755 index 00000000000..f407fdd3789 --- /dev/null +++ b/test/wasm.simd/int64x2Tests.js @@ -0,0 +1,64 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const array = new Int32Array (memObj.buffer); + +const int64Module = new WebAssembly.Module(readbuffer('int64x2.wasm')); +const int64Instance = new WebAssembly.Instance(int64Module, { "dummy" : { "memory" : memObj } }).exports; + +function moveArgsIntoArray(args, offset, arr) { + for (let i = 0; i < args.length; i++) { + arr[offset + i] = args[i]; + } +} + +let testInt64MathOps = function (funcname, inputArr, funcArgs, resultArr) { + + moveArgsIntoArray(inputArr, 0, array); + int64Instance[funcname](...funcArgs); + + for (let i = 0; i < resultArr.length; i++) { + assertEquals(array[i], resultArr[i]); + } + +} + +testInt64MathOps("func_i64x2_splat", [0xF0F0F0F0, 0xAABBAABB], [], [0xF0F0F0F0|0, 0xAABBAABB|0, 0xF0F0F0F0|0, 0xAABBAABB|0]); +testInt64MathOps("func_i64x2_splat", [0xBEEF0000, 0xCAFEFFFF], [], [0xBEEF0000|0, 0xCAFEFFFF|0, 0xBEEF0000|0, 0xCAFEFFFF|0]); +testInt64MathOps("func_i64x2_extractlane_0", [0xDEADDEAD, 0xABBAABBA, 0x0, 0x0], [], [0xDEADDEAD|0, 0xABBAABBA|0, 0xDEADDEAD|0, 0xABBAABBA|0]); +testInt64MathOps("func_i64x2_extractlane_1", [0x0, 0x0, 0xDEADDEAD, 0xABBAABBA], [], [0xDEADDEAD|0, 0xABBAABBA|0, 0xDEADDEAD|0, 0xABBAABBA|0]); +testInt64MathOps("func_i64x2_replacelane_0", [0xDEADDEAD, 0xABBAABBA, 0xBEEF0000, 0xCAFEFFFF, 0xCCCCCCCC, 0xDDDDDDDD], [], [0xCCCCCCCC|0, 0xDDDDDDDD|0, 0xBEEF0000|0, 0xCAFEFFFF|0]); +testInt64MathOps("func_i64x2_replacelane_1", [0xDEADDEAD, 0xABBAABBA, 0xBEEF0000, 0xCAFEFFFF, 0xCCCCCCCC, 0xDDDDDDDD], [], [0xDEADDEAD|0, 0xABBAABBA|0, 0xCCCCCCCC|0, 0xDDDDDDDD|0]); +testInt64MathOps("func_i64x2_add", [0x00000001, 0x000FF000, 0xC, 0xA, 0xFFFFFFFF, 0xFF00000E, 0x3, 0x5], [], [0x0, 0xFF0FF00F|0, 0xF, 0xF]); +testInt64MathOps("func_i64x2_sub", [00000003, 0xD0CFB000, 0xC, 0xA, 0xFFFFFFFF, 0xD0BA0000, 0x3, 0x5], [], [0x00000004, 0x0015afff, 0x9, 0x5]); +testInt64MathOps("func_i64x2_neg", [0x0, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF], [], [0x0, 0x80000000|0, 0x00000001, 0x80000000|0]); +testInt64MathOps("func_i64x2_shl", [0x00100001, 0x80000001, 0xFFFFFFFF, 0xFFFFFFFF], [3], [0x00800008|0, 0x00000008, 0xFFFFFFF8|0, 0xFFFFFFFF|0]); +testInt64MathOps("func_i64x2_shl", [0x00100001, 0x80000001, 0xFFFFFFFF, 0xFFFFFFFF], [67], [0x00800008|0, 0x00000008, 0xFFFFFFF8|0, 0xFFFFFFFF|0]); +testInt64MathOps("func_i64x2_shr_u", [0x00100001, 0x80000001, 0xFFFFFFFF, 0xFFFFFFFF], [3], [0x20020000|0, 0x10000000|0, 0xFFFFFFFF|0, 0x1FFFFFFF|0]); +testInt64MathOps("func_i64x2_shr_u", [0x00100001, 0x80000001, 0xFFFFFFFF, 0xFFFFFFFF], [67], [0x20020000|0, 0x10000000|0, 0xFFFFFFFF|0, 0x1FFFFFFF|0]); +testInt64MathOps("func_i64x2_shr_s", [0x00100001, 0x80000001, 0xFFFFFFFF, 0xFFFFFFFF], [3], [0x20020000|0, 0xf0000000|0, 0xFFFFFFFF|0, 0xFFFFFFFF|0]); +testInt64MathOps("func_i64x2_shr_s", [0x00100001, 0x80000001, 0xFFFFFFFF, 0xFFFFFFFF], [67], [0x20020000|0, 0xf0000000|0, 0xFFFFFFFF|0, 0xFFFFFFFF|0]); +testInt64MathOps("func_i64x2_anytrue", [0x0, 0x80000000, 0x0, 0x0], [], [0x1]); +testInt64MathOps("func_i64x2_anytrue", [0x0, 0x00000000, 0x0, 0x1], [], [0x1]); +testInt64MathOps("func_i64x2_anytrue", [0x0, 0x0, 0x0, 0x0], [], [0x0]); +testInt64MathOps("func_i64x2_alltrue", [0x1, 0x0, 0x0, 0x1], [], [0x1]); +testInt64MathOps("func_i64x2_alltrue", [0x0, 0x1, 0x1, 0x0], [], [0x1]); +testInt64MathOps("func_i64x2_alltrue", [0x0, 0x0, 0x1, 0x1], [], [0x0]); +testInt64MathOps("func_i64x2_alltrue", [0x1, 0x0, 0x0, 0x0], [], [0x0]); + +if (passed) { + print("Passed"); +} \ No newline at end of file diff --git a/test/wasm.simd/loadTests.js b/test/wasm.simd/loadTests.js new file mode 100755 index 00000000000..2f3831a6664 --- /dev/null +++ b/test/wasm.simd/loadTests.js @@ -0,0 +1,83 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +function testLoadOpsForType(funcname, module, laneValues, expectedResults, startPositions) { + + let memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); + const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; + + let intArray = new Int32Array (memObj.buffer); + + let forEachTestPosition = (action) => { + + for (const pos of startPositions) { + for (let i = 0; i < 4; i++) { + action(pos, i); + } + } + }; + + forEachTestPosition ((pos, i) => {intArray[pos + i] = laneValues[i];}); + instance[funcname](0); + forEachTestPosition((pos, i) => {assertEquals(intArray[pos + i], expectedResults[i]);}); + + const MEM_SIZE_IN_BYTES = 1024 * 64; + + let check = function(expected, funName, ...args) { + let fun = eval(funName); + var result; + try { + result = fun(...args); + + } + catch (e) { + if (e.message === "Access index is out of range" || + e.message === "Simd typed array access: argument out of range" || + e.message === "argument out of range" || + e.message === "Memory index is out of range" + ) { + result = "Access index is out of range"; + } + else { + result = e.message; + } + } + + if(result != expected) + { + passed = false; + print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`); + } + } + + check(0, "instance.m128_load4", MEM_SIZE_IN_BYTES - 32); + check(0, "instance.m128_load4", MEM_SIZE_IN_BYTES - 16); + check("Access index is out of range", "instance.m128_load4", MEM_SIZE_IN_BYTES - 8); + check("Access index is out of range", "instance.m128_load4", MEM_SIZE_IN_BYTES - 4); + check("Access index is out of range", "instance.m128_load4_offset", 0xFFFFFFFC); + check("Access index is out of range", "instance.m128_load4_offset", -1); + +} + +const INITIAL_SIZE = 1; +const module = new WebAssembly.Module(readbuffer('loads.wasm')); + +const laneValues = [0xAAAAAAAA, 0xFFFFFFFF, 0X80000000, 0x90A762A6]; +const expectedResults = [16, 32, 1, 14]; //i32.popcnt +const startPositions = [0, 5, 11, 17]; + +testLoadOpsForType("m128_load_test", module, laneValues, expectedResults,startPositions); + +if (passed) { + print("Passed"); +} \ No newline at end of file diff --git a/test/wasm.simd/loads.wasm b/test/wasm.simd/loads.wasm new file mode 100755 index 00000000000..c3d6c56e8ec Binary files /dev/null and b/test/wasm.simd/loads.wasm differ diff --git a/test/wasm.simd/loads.wast b/test/wasm.simd/loads.wast new file mode 100755 index 00000000000..0a96dd9d1e4 --- /dev/null +++ b/test/wasm.simd/loads.wast @@ -0,0 +1,54 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "m128_load4") (param $x i32) (result i32) + (i32x4.extract_lane lane=0 (m128.load offset=0 align=4 (get_local $x))) + ) + + (func (export "m128_load4_offset") (param $x i32) (result i32) + (i32x4.extract_lane lane=0 (m128.load offset=16 align=4 (get_local $x))) + ) + + (func (export "m128_load_test") (param $x i32) (local m128) + (set_local 1 (m128.load offset=0 align=4 (get_local $x))) + (i32.store offset=0 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) + (set_local 1 (m128.load offset=0 align=4 (get_local $x))) + (i32.store offset=4 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) + (set_local 1 (m128.load offset=0 align=4 (get_local $x))) + (i32.store offset=8 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) + (set_local 1 (m128.load offset=0 align=4 (get_local $x))) + (i32.store offset=12 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) + ;; + (set_local 1 (m128.load offset=20 align=4 (get_local $x))) + (i32.store offset=20 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) + (set_local 1 (m128.load offset=20 align=4 (get_local $x))) + (i32.store offset=24 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) + (set_local 1 (m128.load offset=20 align=4 (get_local $x))) + (i32.store offset=28 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) + (set_local 1 (m128.load offset=20 align=4 (get_local $x))) + (i32.store offset=32 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) + ;; + (set_local 1 (m128.load offset=44 align=4 (get_local $x))) + (i32.store offset=44 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) + (set_local 1 (m128.load offset=44 align=4 (get_local $x))) + (i32.store offset=48 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) + (set_local 1 (m128.load offset=44 align=4 (get_local $x))) + (i32.store offset=52 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) + (set_local 1 (m128.load offset=44 align=4 (get_local $x))) + (i32.store offset=56 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) + ;; + (set_local 1 (m128.load offset=68 align=4 (get_local $x))) + (i32.store offset=68 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) + (set_local 1 (m128.load offset=68 align=4 (get_local $x))) + (i32.store offset=72 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) + (set_local 1 (m128.load offset=68 align=4 (get_local $x))) + (i32.store offset=76 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) + (set_local 1 (m128.load offset=68 align=4 (get_local $x))) + (i32.store offset=80 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) + ) +) diff --git a/test/wasm.simd/logical.wasm b/test/wasm.simd/logical.wasm new file mode 100755 index 00000000000..b06dccceed4 Binary files /dev/null and b/test/wasm.simd/logical.wasm differ diff --git a/test/wasm.simd/logical.wast b/test/wasm.simd/logical.wast new file mode 100644 index 00000000000..5fd6f40c193 --- /dev/null +++ b/test/wasm.simd/logical.wast @@ -0,0 +1,56 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "m128_and") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (m128.and (get_local $v1) (get_local $v2))) + ) + + (func (export "m128_or") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (m128.or (get_local $v1) (get_local $v2))) + ) + + (func (export "m128_xor") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (m128.xor (get_local $v1) (get_local $v2))) + ) + + (func (export "m128_not") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (m128.not (get_local $v1))) + ) + + (func (export "i32x4_anytrue") (result i32) + (i32x4.any_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "i32x4_alltrue") (result i32) + (i32x4.all_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "i16x8_anytrue") (result i32) + (i16x8.any_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "i16x8_alltrue") (result i32) + (i16x8.all_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "i8x16_anytrue") (result i32) + (i8x16.any_true (m128.load offset=0 align=4 (i32.const 0))) + ) + + (func (export "i8x16_alltrue") (result i32) + (i8x16.all_true (m128.load offset=0 align=4 (i32.const 0))) + ) + +) diff --git a/test/wasm.simd/logicalTests.js b/test/wasm.simd/logicalTests.js new file mode 100755 index 00000000000..0391899385d --- /dev/null +++ b/test/wasm.simd/logicalTests.js @@ -0,0 +1,113 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const arrays = [new Int32Array (memObj.buffer), + new Int16Array (memObj.buffer), + new Int8Array (memObj.buffer), +]; + +const module = new WebAssembly.Module(readbuffer('logical.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; + +function moveArgsIntoArray(args, offset, arr) { + for (let i = 0; i < args.length; i++) { + arr[offset + i] = args[i]; + } +} + +let testIntLogicalOps = function (funcname, args1, args2, resultArr) { + + const len = args1.length; + const typeIndex = Math.log2(len) - 2; + let arr = arrays [typeIndex]; + + moveArgsIntoArray(args1, 0, arr); + moveArgsIntoArray(args2, len, arr); + moveArgsIntoArray(resultArr, 2 * len, arr); + + instance[funcname](); + + const resultOffset = 2 * len; + for (let i = 0; i < len; i++) { + assertEquals(arr[i], arr[resultOffset + i]); + } +} + +testIntLogicalOps("m128_and", [-1, 0x80007001, 0, 7], [-1, 0x80007001, 0, 7], [-1, 0x80007001, 0, 7]); +testIntLogicalOps("m128_and", [-1, 0x80000001, 0x80007001, 7], [0, 0x80000001, -1 ^ 0x80007001, 7], [0, 0x80000001, 0, 7]); + +testIntLogicalOps("m128_or", [-1, 0xFFFF0000, 0x80000001, 0x55555555], [0, 0x0000FFFF, 0x80000001 ^ -1, 0xaaaaaaaa], [-1, -1, -1, -1]); +testIntLogicalOps("m128_or", [0xFFFF0000, 0, 0x80007001, 0], [0, 0, 0, 0x55555555], [0xFFFF0000, 0, 0x80007001, 0x55555555]); + +testIntLogicalOps("m128_xor", [-1, 0xFFFF0000, 0x80000001, 0x55555555], [0, 0x0000FFFF, 0x80000001 ^ -1, 0xaaaaaaaa], [-1, -1, -1, -1]); +testIntLogicalOps("m128_xor", [0xFFFF0000, 0x55555555, -1, 0], [0, 0xaaaaaaaa, 0xFFFF0000, 0x55555555], [0xFFFF0000, -1, 0x0000FFFF, 0x55555555]); + +testIntLogicalOps("m128_not", [-1, 0xFFFF0000, 0x80000001, 0x55555555], [0, 0, 0, 0] /*dummy*/, [0, 0x0000FFFF, 0x80000001 ^ -1, 0xaaaaaaaa]); + +let testBoolReduceOps = function (funcname, args1, expected) { + + const len = args1.length; + let typeIndex = Math.log2(len) - 2; + let arr = arrays [typeIndex]; + + moveArgsIntoArray(args1, 0, arr); + let result = instance[funcname](); + assertEquals(expected, result); +} + +testBoolReduceOps("i32x4_anytrue", [0, 0, 0, 0], false); +testBoolReduceOps("i32x4_anytrue", [0, 0, 0, 0xFFFFFFFF], true); +testBoolReduceOps("i32x4_anytrue", [0xFFFFFFFF, 0, 0, 0], true); +testBoolReduceOps("i32x4_anytrue", [0x80000000, 0, 0, 0], true); +testBoolReduceOps("i32x4_anytrue", [0, 0, 0, 0x10], true); + +testBoolReduceOps("i32x4_alltrue", [0, 0, 0, 0], false); +testBoolReduceOps("i32x4_alltrue", [0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0], false); +testBoolReduceOps("i32x4_alltrue", [0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF], true); +testBoolReduceOps("i32x4_alltrue", [0x10, 0x4, 0x8, 0x1], true); +testBoolReduceOps("i32x4_alltrue", [0x80000000, 0x40000000, 0x20000000, 0x10000000], true); +testBoolReduceOps("i32x4_alltrue", [0x80000000, 0x40000000, 0x20000000, 0], false); + +testBoolReduceOps("i16x8_anytrue", [0, 0, 0, 0, 0, 0, 0, 0], false); +testBoolReduceOps("i16x8_anytrue", [0, 0, 0, 0, 0, 0, 0xFFFF, 0], true); +testBoolReduceOps("i16x8_anytrue", [0, 0, 0xFFFF, 0, 0, 0, 0, 0], true); +testBoolReduceOps("i16x8_anytrue", [0, 0, 1, 0, 0, 0, 0, 0], true); +testBoolReduceOps("i16x8_anytrue", [0, 0, 1, 0, 0, 0, 0x10, 0], true); +testBoolReduceOps("i16x8_anytrue", [0, 0, 0, 0, 0, 0, 0, 0x800], true); + +testBoolReduceOps("i16x8_alltrue", [0, 0, 0, 0, 0, 0, 0, 0], false); +testBoolReduceOps("i16x8_alltrue", [0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF], false); +testBoolReduceOps("i16x8_alltrue", [0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF], true); +testBoolReduceOps("i16x8_alltrue", [0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF], true); +testBoolReduceOps("i16x8_alltrue", [0x8000, 0x4000, 0x2000, 0x1000, 0x800, 0x400, 0x200, 0x100], true); +testBoolReduceOps("i16x8_alltrue", [0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1], true); +testBoolReduceOps("i16x8_alltrue", [0x80, 0x40, 0x20, 0x10, 0x8, 0, 0x2, 0x1], false); + +testBoolReduceOps("i8x16_anytrue", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], false); +testBoolReduceOps("i8x16_anytrue", [0, 0, 0, 0, 0, 0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0], true); +testBoolReduceOps("i8x16_anytrue", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0, 0], true); +testBoolReduceOps("i8x16_anytrue", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0, 0, 0, 0, 0], true); +testBoolReduceOps("i8x16_anytrue", [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], true); + +testBoolReduceOps("i8x16_alltrue", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], false); +testBoolReduceOps("i8x16_alltrue", [0xFF, 0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF], false); +testBoolReduceOps("i8x16_alltrue", [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF], true); +testBoolReduceOps("i8x16_alltrue", [0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 3, 5, 7, 9, 11, 87, 42, 1], true); +testBoolReduceOps("i8x16_alltrue", [0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0, 0x1, 3, 5, 7, 9, 11, 87, 42, 1], false); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/math.wasm b/test/wasm.simd/math.wasm new file mode 100755 index 00000000000..257fc5994c8 Binary files /dev/null and b/test/wasm.simd/math.wasm differ diff --git a/test/wasm.simd/math.wast b/test/wasm.simd/math.wast new file mode 100755 index 00000000000..d642ac0d719 --- /dev/null +++ b/test/wasm.simd/math.wast @@ -0,0 +1,334 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "func_i8x16_shuffle_test0") + (local $v1 m128) (local $v2 m128) + + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + + (m128.store offset=0 (i32.const 0) + (v8x16.shuffle + (get_local $v1) (get_local $v2) + (i32.const 31) (i32.const 30) (i32.const 29) (i32.const 28) + (i32.const 1) (i32.const 17) (i32.const 2) (i32.const 19) + (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) + (i32.const 21) (i32.const 20) (i32.const 11) (i32.const 10) + ) + ) + ) + + (func (export "func_i8x16_shuffle_test1") + (local $v1 m128) (local $v2 m128) + + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + + (m128.store offset=0 (i32.const 0) + (v8x16.shuffle + (get_local $v1) (get_local $v2) + (i32.const 16) (i32.const 17) (i32.const 18) (i32.const 19) + (i32.const 20) (i32.const 21) (i32.const 22) (i32.const 23) + (i32.const 0) (i32.const 1) (i32.const 2) (i32.const 3) + (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) + ) + ) + ) + + (func (export "func_i8x16_shuffle_test2") + (local $v1 m128) (local $v2 m128) + + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + + (m128.store offset=0 (i32.const 0) + (v8x16.shuffle + (get_local $v1) (get_local $v2) + (i32.const 0) (i32.const 17) (i32.const 1) (i32.const 18) + (i32.const 2) (i32.const 19) (i32.const 3) (i32.const 20) + (i32.const 4) (i32.const 21) (i32.const 5) (i32.const 22) + (i32.const 6) (i32.const 23) (i32.const 7) (i32.const 24) + ) + ) + ) + + (func (export "func_i32x4_bitselect") (local $v1 m128) (local $v2 m128) (local $mask m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (set_local $mask (m128.load offset=0 align=4 (i32.const 32))) + (m128.store offset=0 (i32.const 0) (m128.bitselect (get_local $v1) (get_local $v2) (get_local $mask))) + ) + + (func (export "func_i32x4_add") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i32x4.add (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i32x4_sub") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i32x4.sub (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i32x4_mul") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i32x4.mul (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i32x4_shl") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i32x4.shl (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i32x4_shr_s") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i32x4.shr_s (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i32x4_shr_u") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i32x4.shr_u (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i16x8_add") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.add (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_addsaturate_s") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.addsaturate_s (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_addsaturate_u") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.addsaturate_u (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_sub") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.sub (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_subsaturate_s") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.subsaturate_s (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_subsaturate_u") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.subsaturate_u (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_mul") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.mul (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i16x8_shl") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.shl (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i16x8_shr_s") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.shr_s (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i16x8_shr_u") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i16x8.shr_u (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i8x16_add") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.add (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_addsaturate_s") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.addsaturate_s (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_addsaturate_u") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.addsaturate_u (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_sub") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.sub (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_subsaturate_s") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.subsaturate_s (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_subsaturate_u") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.subsaturate_u (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_mul") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.mul (get_local $v1) (get_local $v2))) + ) + + (func (export "func_i8x16_shl") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.shl (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i8x16_shr_s") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.shr_s (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_i8x16_shr_u") (param $shamt i32) (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (i8x16.shr_u (get_local $v1) (get_local $shamt))) + ) + + (func (export "func_f32x4_add") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f32x4.add (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f32x4_sub") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f32x4.sub (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f32x4_mul") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f32x4.mul (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f32x4_div") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f32x4.div (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f32x4_min") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f32x4.min (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f32x4_max") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f32x4.max (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f32x4_abs") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f32x4.abs (get_local $v1))) + ) + + (func (export "func_f32x4_sqrt") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f32x4.sqrt (get_local $v1))) + ) + + (func (export "func_f64x2_add") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f64x2.add (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f64x2_sub") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f64x2.sub (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f64x2_mul") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f64x2.mul (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f64x2_div") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f64x2.div (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f64x2_min") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f64x2.min (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f64x2_max") (local $v1 m128) (local $v2 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (set_local $v2 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 (i32.const 0) (f64x2.max (get_local $v1) (get_local $v2))) + ) + + (func (export "func_f64x2_abs") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f64x2.abs (get_local $v1))) + ) + + (func (export "func_f64x2_sqrt") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f64x2.sqrt (get_local $v1))) + ) + + (func (export "func_i64x2_trunc_s") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.trunc_s (get_local $v1))) + ) + + (func (export "func_i64x2_trunc_u") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i64x2.trunc_u (get_local $v1))) + ) + + (func (export "func_f64x2_convert_s") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f64x2.convert_s (get_local $v1))) + ) + + (func (export "func_f64x2_convert_u") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f64x2.convert_u (get_local $v1))) + ) +) diff --git a/test/wasm.simd/mathTests.js b/test/wasm.simd/mathTests.js new file mode 100755 index 00000000000..c953aff4efc --- /dev/null +++ b/test/wasm.simd/mathTests.js @@ -0,0 +1,415 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const module = new WebAssembly.Module(readbuffer('math.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; +const arrays = { + "i32x4" : new Int32Array (memObj.buffer), + "i16x8" : new Int16Array (memObj.buffer), + "i8x16" : new Int8Array (memObj.buffer), + "f32x4" : new Float32Array (memObj.buffer), + "f64x2" : new Float64Array (memObj.buffer), + "i64x2" : new Int32Array (memObj.buffer) +}; + +function moveArgsIntoArray(args, offset, arr) { + for (let i = 0; i < args.length; i++) { + arr[offset + i] = args[i]; + } +} + +let testCompOps = function (funcname, args1, args2, op, resultArr) { + + const len = args1.length; + const arr = arrays[funcname.split('_')[1]]; + + moveArgsIntoArray(args1, 0, arr); + moveArgsIntoArray(args2, len, arr); + instance[funcname](op); + for (let i = 0; i < len; i++) { + assertEquals(resultArr[i], Number.isNaN(arr[i]) || !!arr[i]); + } +} + +let testMathOps = function (funcname, args1, args2, resultArr) { + + const len = args1.length; + const type = funcname.split('_')[1]; + const arr = arrays[type]; + + moveArgsIntoArray(args1, 0, arr); + if (Array.isArray(args2)) { //binary ops + moveArgsIntoArray(args2, len, arr); + instance[funcname](); + } + else if (Number.isInteger(args2)) { + instance[funcname](args2); //shift amount for shl/shr + } + else { + instance[funcname](); //unary ops + } + + for (let i = 0; i < resultArr.length; i++) { + if ((type === "f32x4" || type === "f64x2") && Number.isNaN(resultArr[i])) { + assertEquals(true, Number.isNaN(arr[i])); + } else { + assertEquals(resultArr[i], arr[i]); + } + + } +} + +let reverse64x2Type = (type) => type === "f64x2" ? "i64x2" : "f64x2"; +let testTruncConvOps = function (funcname, args1, resultArr) { + + const len = args1.length; + const type = funcname.split('_')[1]; + const resultType = reverse64x2Type(type); + const arr = arrays[resultType]; + const resultArrView = arrays[type]; + + moveArgsIntoArray(args1, 0, arr); + instance[funcname](); + + for (let i = 0; i < resultArr.length; i++) { + if (type === "f64x2" && Number.isNaN(resultArr[i])) { + assertEquals(true, Number.isNaN(resultArrView[i])); + } else { + assertEquals(resultArr[i], resultArrView[i]); + } + + } +} + +//i32x4 +testMathOps("func_i32x4_add", + [2147483645 , 0, -1, 65536], + [3 , 0, 1 , 65536], + [-2147483648, 0, 0 , 131072] +); + +testMathOps("func_i32x4_sub", + [-2147483648, 0 , 65536, 32768], + [1 , -1, 65536, 65536], + [2147483647 , 1 , 0 , -32768] +); + +testMathOps("func_i32x4_mul", + [65536, 65536 , 0 , 1], + [65536, 65535 , 2147483647, -2147483648], + [0 , -65536, 0 , -2147483648] +); + +testMathOps("func_i32x4_shl", [1,2,1,0], 30, [1073741824, -2147483648, 1073741824, 0]); +testMathOps("func_i32x4_shl", [1,2,8,16], 32, [1,2,8,16]); + +testMathOps("func_i32x4_shr_s", [-2147483648,0x80010000,0x10000,1], 16, [-32768,-32767,1,0]); +testMathOps("func_i32x4_shr_u", [-2147483648,0x80010000,0x10000,1], 16, [32768,32769,1,0]); + +//i16x8 +testMathOps("func_i16x8_add", + [32767 , -32768, -1, 16, 32767 , -32768, 0, 0], + [1 , -1 , 1 , 16, 2 , -2 , 0, 32767], + [-32768, 32767 , 0 , 32, -32767, 32766 , 0, 32767] +); + +testMathOps("func_i16x8_addsaturate_s", + [32767, 32767, -1 , -32768, 32767, -32768, 0 , 0], + [1 , 32767, -32768, -32768, -2 , 0 , 32767, -1], + [32767, 32767, -32768, -32768, 32765, -32768, 32767, -1] +); + +testMathOps("func_i16x8_subsaturate_s", + [32767, 32767 , -2 , -32768, 32767, -32768, 0 , 0], + [-1 , -32767, 32767 , 32767, 2 , 0 , -32767, -1], + [32767, 32767 , -32768, -32768, 32765, -32768, 32767 , 1] +); + +testMathOps("func_i16x8_addsaturate_u", + [65535, 65535, 65535, 32768, 32767 , 0, 0 , 32768], + [1 , 2 , 0 , 32768, 2 , 0, 32768 , 1], + [-1 , -1 , -1 , -1 , -32767, 0, -32768, -32767] +); + +testMathOps("func_i16x8_subsaturate_u", + [0 , 65535, 65535, 32768, 32767, 0, 65535, 65535], + [65535, 2 , 0 , 32768, 32768, 0, 32768, 32767], + [0 , -3 , -1 , 0 , 0, 0, 32767, -32768] +); + +testMathOps("func_i16x8_sub", + [-1 , 32767 , 1, 16, 32767 , -32768, 0, 0], + [32768,-1 , 1, 16, -2 , 2 , 0, 32767], + [32767,-32768 , 0, 0 , -32767, 32766 , 0, -32767] +); + +testMathOps("func_i16x8_mul", + [256, 128 , 1 , -32768, -32768, -32768, -128 , 0], + [256, 256 , -32767, 0 , -32768, 2 , -128 , 0], + [0 ,-32768, -32767, 0 , 0 , 0 , 16384, 0] +); + +testMathOps("func_i16x8_shl", [1, 2, 3, 0, 4, 5, 6, 7], 15, [-32768, 0, -32768, 0, 0, -32768, 0, -32768]); +testMathOps("func_i16x8_shl", [1, 2, 3, 0, 2048, 128, 256, 512], 4, [16, 32, 48, 0, -32768, 2048, 4096, 8192]); + +testMathOps("func_i16x8_shr_s", [0x8000, 0x8100, 256, 1, 0x8000, 0x8101, 257, 0], 8, [-128, -127, 1, 0, -128, -127, 1, 0]); +testMathOps("func_i16x8_shr_u", [0x8000, 0x8100, 256, 1, 0x8000, 0x8101, 257, 0], 8, [128, 129, 1, 0, 128, 129, 1, 0]); + +//i8x16 +testMathOps("func_i8x16_add", + [127 , -128, -1, 16, 127 , -128, 0, 0 , 127 , -128, -1, 16, 127 , -128, 0, 0], + [1 , -1 , 1 , 16, 2 , -2 , 0, 127, 1 , -1 , 1 , 16, 2 , -2 , 0, 127], + [-128, 127 , 0 , 32, -127, 126 , 0, 127, -128, 127 , 0 , 32, -127, 126 , 0, 127] +); + +testMathOps("func_i8x16_addsaturate_s", + [127, 127, -1 , -128, 127, -128, 0 , 0 , 127, 127, -1 , -128, 127, -128, 0 , 0 ], + [1 , 127, -128, -128, -2 , 0 , 127, -1, 1 , 127, -128, -128, -2 , 0 , 127, -1], + [127, 127, -128, -128, 125, -128, 127, -1, 127, 127, -128, -128, 125, -128, 127, -1] +); + +testMathOps("func_i8x16_subsaturate_s", + [127, 127 , -2 , -128, 127, -128, 0 , 0 , 127, 127 , -2 , -128, 127, -128, 0 , 0], + [-1 , -127, 127 , 127, 2 , 0 , -127, -1, -1 , -127, 127 , 127, 2 , 0 , -127, -1], + [127, 127 , -128, -128, 125, -128, 127 , 1 , 127, 127 , -128, -128, 125, -128, 127 , 1] +); + +testMathOps("func_i8x16_addsaturate_u", + [255, 255, 255, 128, 127 , 0, 0 , 128 , 255, 255, 255, 128, 127 , 0, 0 , 128 ], + [1 , 2 , 0 , 128, 2 , 0, 128 , 1 , 1 , 2 , 0 , 128, 2 , 0, 128 , 1], + [-1 , -1 , -1 , -1 , -127, 0, -128, -127, -1 , -1 , -1 , -1 , -127, 0, -128, -127] +); + +testMathOps("func_i8x16_subsaturate_u", + [0 , 255, 255, 128, 127, 0, 255, 255 , 0 , 255, 255, 128, 127, 0, 255, 255], + [255, 2 , 0 , 128, 128, 0, 128, 127 , 255, 2 , 0 , 128, 128, 0, 128, 127], + [0 , -3 , -1 , 0 , 0 , 0, 127, -128, 0 , -3 , -1 , 0 , 0 , 0, 127, -128] +); + +testMathOps("func_i8x16_sub", + [-1 , 127 , 1, 16, 127 , -128 , 0, 0 , -1 , 127 , 1, 16, 127 , -128 , 0, 0], + [128 ,-1 , 1, 16, -2 , 2 , 0, 127 , 128 ,-1 , 1, 16, -2 , 2 , 0, 127], + [127 ,-128 , 0, 0 , -127 , 126 , 0, -127, 127 ,-128 , 0, 0 , -127 , 126 , 0, -127] +); + +testMathOps("func_i8x16_mul", + [16, 8 , 1 , -128, -128, -128, -8 , 12 , 16, 8 , 1 , -128, -128, -128, -8 , 12], + [16, 16 , -127, 0 , -128, 2 , -8 , 10 , 16, 16 , -127, 0 , -128, 2 , -8 , 10], + [0 ,-128, -127, 0 , 0 , 0 , 64 , 120, 0 ,-128, -127, 0 , 0 , 0 , 64 , 120] +); + +testMathOps("func_i8x16_shl", [1, 2, 3, 0, 4, 5, 6, 7, 1, 2, 3, 0, 4, 5, 6, 16], 4, [16, 32, 48, 0, 64, 80, 96, 112, 16, 32, 48, 0, 64, 80, 96, 0]); +testMathOps("func_i8x16_shr_s", [-128, -127, 16, 1, -128, -127, 16, -64, -128, -127, 16, 1, -128, -127, 16, 1], 4, [-8, -8, 1, 0, -8, -8, 1, -4, -8, -8, 1, 0, -8, -8, 1, 0]); +testMathOps("func_i8x16_shr_u", [128, 192, 16, 1, 128, 193, 17, 0, 128, 192, 16, 1, 128, 193, 17, 0], 4, [8, 12, 1, 0, 8, 12, 1, 0, 8, 12, 1, 0, 8, 12, 1, 0]); + +//f32x4 +testMathOps("func_f32x4_add", + [400000512, 400000256, 1.4E-45 , -1], + [400000505, 400000100, 1.4E-45 , Number.NaN], + [800001024, 800000384, 2.802596928649634e-45, Number.NaN] +); + +testMathOps("func_f32x4_sub", + [800001024, 800000256, 2.802596928649634e-45, 1], + [400000512, 400000128, 1.4E-45 , Number.NaN], + [400000512, 400000128, 1.401298464324817e-45, Number.NaN] +); + +testMathOps("func_f32x4_mul", + [400000512 , 800000256 , 2.802596928649634e-45, 1], + [400000512 , 400000128 , 1.4E-45 , Number.NaN], + [160000416677888000, 320000214880485400, 0 , Number.NaN] +); + +testMathOps("func_f32x4_div", + [400000512, 800000256, 2.802596928649634e-45, 1], + [400000512, 400000128, 1.4E-45 , Number.NaN], + [1 , 2 , 2 , Number.NaN] +); + +testMathOps("func_f32x4_abs", + [-800001024, 0, 2.802596928649634e-45, -1.401298464324817e-45], + null, + [800001024, 0 , 2.802596928649634e-45, 1.401298464324817e-45] +); + +testMathOps("func_f32x4_sqrt", + [1 << 20, 0, 6.25, -1], + null, + [1 << 10, 0, 2.5 , Number.NaN] +); + +//f64x2 +testMathOps("func_f64x2_add", + [1.34826985114673713038100984656E308, 2.7670146896890036224E19], + [1.34826985114673693079697889309E308, 1.288491622400006103515625E10], + [Number.POSITIVE_INFINITY , 27670146909774954000] +); + +testMathOps("func_f64x2_add", + [1.4E-45 , -1], + [1.4E-45 , Number.NaN], + [2.8e-45, Number.NaN] +); + +testMathOps("func_f64x2_add", + [400000512, 400000256], + [400000505, 400000100], + [800001017, 800000356] +); + +testMathOps("func_f64x2_sub", + [1.34826985114673713038100984656E308, 2.7670146896890036224E19], + [1.34826985114673693079697889309E308, 1.288491622400006103515625E10], + [1.99584030953472e+292 , 27670146884005120000] +); + +testMathOps("func_f64x2_sub", + [2.802596928649634e-45, 1], + [1.4E-45 , Number.NaN], + [1.402596928649634e-45, Number.NaN] +); + +testMathOps("func_f64x2_sub", + [800001024, 800000256], + [400000512, 400000128], + [400000512, 400000128] +); + +testMathOps("func_f64x2_mul", + [1.34826985114673713038100984656E308, 2.7670146896890036224E19], + [1.34826985114673693079697889309E308, 1.288491622400006103515625E10], + [Number.POSITIVE_INFINITY , 3.565275246722034e+29] +); + +testMathOps("func_f64x2_mul", + [400000512 , 800000256 ], + [400000512 , 400000128 ], + [160000409600262140, 320000204800032800] +); + +testMathOps("func_f64x2_div", + [-4.27214248753826131799357622933E-306, 1], + [3 , Number.NaN], + [-1.424047495846087e-306 , Number.NaN] +); + +testMathOps("func_f64x2_div", + [400000512, 800000256], + [400000512, 400000128], + [1 , 2 ] +); + +testMathOps("func_f64x2_abs", + [-800001024, -4.27214248753826131799357622933E-306], + null, + [800001024 , 4.27214248753826131799357622933E-306] +); + +testMathOps("func_f64x2_abs", + [-Math.pow(120), -0], + null, + [Math.pow(120) , 0] +); + +testMathOps("func_f64x2_sqrt", + [Math.pow(2, 40), 0], + null, + [Math.pow(2, 20), 0] +); + +testMathOps("func_f64x2_sqrt", + [6.25, -1], + null, + [2.5 , Number.NaN] +); + +//bitselect +testMathOps("func_i32x4_bitselect", + [0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0, 0, 0, 0, + 0xF0F0F0F0, 0xFFFFFFFF, 0xD6D6D6D6, 0xAAAAAAAA], + null, + [0xF0F0F0F0|0, 0xFFFFFFFF|0, 0xD6D6D6D6|0, 0xAAAAAAAA|0] +); + +testMathOps("func_i32x4_bitselect", + [0, 0, 0, 0, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + ~0xF0F0F0F0, ~0xFFFFFFFF, ~0xD6D6D6D6, ~0xAAAAAAAA], + null, + [0xF0F0F0F0|0, 0xFFFFFFFF|0, 0xD6D6D6D6|0, 0xAAAAAAAA|0] +); + +testMathOps("func_i32x4_bitselect", + [0xBEBEBEBE, 0xD7D7D7D7, 0xF3F3F3F3, 0xFFFFFFFF, + 0x55555555, 0x29292929, 0x0F0F0F0F, 0, + 0xAAAAAAAA, 0xD6D6D6D6,0xF0F0F0F0, 0], + null, + [-1,-1,-1,0] +); + +//shuffle +testMathOps("func_i8x16_shuffle_test1", + [0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15], + [16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31], + [16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7] +); + +testMathOps("func_i8x16_shuffle_test0", + [0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15], + [16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31], + [31 , 30 , 29 , 28 , 1 , 17 , 2 , 19 , 3 , 4 , 5 , 6 , 21 , 20 , 11 , 10] +); + +testMathOps("func_i8x16_shuffle_test2", + [0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15], + [16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31], + [0 , 17 , 1 , 18 , 2 , 19 , 3 , 20 , 4 , 21 , 5 , 22 , 6 , 23 , 7 , 24] +); + +testTruncConvOps("func_f64x2_convert_u", + [0xffffffff|0, 0xffffffff|0, 0, 0], + [1.8446744073709552e+19, 0] +); + +testTruncConvOps("func_f64x2_convert_s", + [0xffffffff|0, 0x7fffffff|0, 0x0|0, 0x80000000|0], + [9.2233720368547758e+18, -9.2233720368547758e+18] +); + +testTruncConvOps("func_i64x2_trunc_u", + [1.8446744073709553e+19, -1], + [0xffffffff|0, 0xffffffff|0, 0, 0] +); + +testTruncConvOps("func_i64x2_trunc_u", + [12345.6, Number.NaN], + [12345, 0, 0, 0] +); + +testTruncConvOps("func_i64x2_trunc_s", + [9.2233720368547759e+18, -9.2233720368547759e+18], + [0xffffffff|0, 0x7fffffff|0, 0x0|0, 0x80000000|0] +); + +testTruncConvOps("func_i64x2_trunc_s", + [-12345.6, Number.NaN], + [-12345, -1, 0, 0] +); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/neg.wasm b/test/wasm.simd/neg.wasm new file mode 100755 index 00000000000..edf567c8b8b Binary files /dev/null and b/test/wasm.simd/neg.wasm differ diff --git a/test/wasm.simd/neg.wast b/test/wasm.simd/neg.wast new file mode 100644 index 00000000000..a33fb8a4263 --- /dev/null +++ b/test/wasm.simd/neg.wast @@ -0,0 +1,38 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "i32x4_neg") (param i32 i32) (local m128) + (set_local 2 (m128.load offset=0 align=4 (get_local 0))) + (set_local 2 (i32x4.neg (get_local 2))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "i16x8_neg") (param i32 i32) (local m128) + (set_local 2 (m128.load offset=0 align=4 (get_local 0))) + (set_local 2 (i16x8.neg (get_local 2))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "i8x16_neg") (param i32 i32) (local m128) + (set_local 2 (m128.load offset=0 align=4 (get_local 0))) + (set_local 2 (i8x16.neg (get_local 2))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "f32x4_neg") (param i32 f32) (local m128) + (set_local 2 (m128.load offset=0 align=4 (get_local 0))) + (set_local 2 (f32x4.neg (get_local 2))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "f64x2_neg") (param i32 f32) (local m128) + (set_local 2 (m128.load offset=0 align=4 (get_local 0))) + (set_local 2 (f64x2.neg (get_local 2))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) +) diff --git a/test/wasm.simd/replace.wasm b/test/wasm.simd/replace.wasm new file mode 100755 index 00000000000..6a5dc0cc7b5 Binary files /dev/null and b/test/wasm.simd/replace.wasm differ diff --git a/test/wasm.simd/replace.wast b/test/wasm.simd/replace.wast new file mode 100755 index 00000000000..83ff72e15e7 --- /dev/null +++ b/test/wasm.simd/replace.wast @@ -0,0 +1,212 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "i32x4_replace0") (param i32 i32) (local m128) + (set_local 2 (i32x4.splat (get_local 0))) + (set_local 2 (i32x4.replace_lane lane=0 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i32x4_replace1") (param i32 i32) (local m128) + (set_local 2 (i32x4.splat (get_local 0))) + (set_local 2 (i32x4.replace_lane lane=1 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i32x4_replace2") (param i32 i32) (local m128) + (set_local 2 (i32x4.splat (get_local 0))) + (set_local 2 (i32x4.replace_lane lane=2 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i32x4_replace3") (param i32 i32) (local m128) + (set_local 2 (i32x4.splat (get_local 0))) + (set_local 2 (i32x4.replace_lane lane=3 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace0") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=0 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace1") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=1 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace2") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=2 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace3") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=3 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace4") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=4 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace5") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=5 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace6") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=6 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i16x8_replace7") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 0))) + (set_local 2 (i16x8.replace_lane lane=7 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace0") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=0 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace1") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=1 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace2") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=2 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace3") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=3 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace4") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=4 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace5") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=5 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace6") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=6 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace7") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=7 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace8") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=8 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace9") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=9 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace10") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=10 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace11") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=11 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace12") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=12 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace13") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=13 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace14") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=14 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "i8x16_replace15") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 0))) + (set_local 2 (i8x16.replace_lane lane=15 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "f32x4_replace0") (param f32 f32) (local m128) + (set_local 2 (f32x4.splat (get_local 0))) + (set_local 2 (f32x4.replace_lane lane=0 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "f32x4_replace1") (param f32 f32) (local m128) + (set_local 2 (f32x4.splat (get_local 0))) + (set_local 2 (f32x4.replace_lane lane=1 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "f32x4_replace2") (param f32 f32) (local m128) + (set_local 2 (f32x4.splat (get_local 0))) + (set_local 2 (f32x4.replace_lane lane=2 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "f32x4_replace3") (param f32 f32) (local m128) + (set_local 2 (f32x4.splat (get_local 0))) + (set_local 2 (f32x4.replace_lane lane=3 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "f64x2_replace0") (param f64 f64) (local m128) + (set_local 2 (f64x2.splat (get_local 0))) + (set_local 2 (f64x2.replace_lane lane=0 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) + + (func (export "f64x2_replace1") (param f64 f64) (local m128) + (set_local 2 (f64x2.splat (get_local 0))) + (set_local 2 (f64x2.replace_lane lane=1 (get_local 2) (get_local 1))) + (m128.store offset=0 align=4 (i32.const 0) (get_local 2)) + ) +) diff --git a/test/wasm.simd/replaceLaneTests.js b/test/wasm.simd/replaceLaneTests.js new file mode 100755 index 00000000000..0fe11979be6 --- /dev/null +++ b/test/wasm.simd/replaceLaneTests.js @@ -0,0 +1,71 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const arrays = { + "i32x4" : { arr : new Int32Array (memObj.buffer) , len : 4 } , + "i16x8" : { arr : new Int16Array (memObj.buffer) , len : 8 } , + "i8x16" : { arr : new Int8Array (memObj.buffer) , len : 16 } , + "f32x4" : { arr : new Float32Array (memObj.buffer) , len : 4 } , + "f64x2" : { arr : new Float64Array (memObj.buffer) , len : 2 } +}; + +const module = new WebAssembly.Module(readbuffer('replace.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; + +let testIntFloatReplace = function (funcname, splat, val) { + + const type = funcname.split('_')[0]; + const arr = arrays [type].arr; + const len = arrays [type].len; + + for (let i = 0; i < len; i++) { + instance[funcname+i](splat, val); + for (let j = 0; j < len; j++) { + if (i != j) { + assertEquals(splat, arr[j]); + } + else { + assertEquals(val, arr[j]); + } + } + } +} + +testIntFloatReplace("i32x4_replace", -1, -2147483648); +testIntFloatReplace("i32x4_replace", 7, -1); +testIntFloatReplace("i32x4_replace", 0, 2147483647); + +testIntFloatReplace("i16x8_replace", -1, -32768); +testIntFloatReplace("i16x8_replace", 1001, -1); +testIntFloatReplace("i16x8_replace", 0, 32767); + +testIntFloatReplace("i8x16_replace", -1, -128); +testIntFloatReplace("i8x16_replace", 100, -1); +testIntFloatReplace("i8x16_replace", 0, 127); + +testIntFloatReplace("f32x4_replace", Number.NEGATIVE_INFINITY, 0.125); +testIntFloatReplace("f32x4_replace", 777.0, 1001.0); +testIntFloatReplace("f32x4_replace", -1.0, Number.POSITIVE_INFINITY); +testIntFloatReplace("f32x4_replace", -100.0, 1.7014118346046924e+38); + +testIntFloatReplace("f64x2_replace", Number.NEGATIVE_INFINITY, 0.125); +testIntFloatReplace("f64x2_replace", 777.0, 1001.0); +testIntFloatReplace("f64x2_replace", -1.0, Number.POSITIVE_INFINITY); +testIntFloatReplace("f64x2_replace", -100.0, 1.7014118346046924e+38); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/rlexe.xml b/test/wasm.simd/rlexe.xml new file mode 100755 index 00000000000..0334755c096 --- /dev/null +++ b/test/wasm.simd/rlexe.xml @@ -0,0 +1,63 @@ + + + + + loadTests.js + -wasm -wasmsimd + + + + + storeTests.js + -wasm -wasmsimd + + + + + constTests.js + -wasm -wasmsimd + + + + + splatNegTests.js + -wasm -wasmsimd + + + + + replaceLaneTests.js + -wasm -wasmsimd + + + + + truncConvTests.js + -wasm -wasmsimd + + + + + logicalTests.js + -wasm -wasmsimd + + + + + comparisonTests.js + -wasm -wasmsimd + + + + + mathTests.js + -wasm -wasmsimd + + + + + int64x2Tests.js + -wasm -wasmsimd + + + diff --git a/test/wasm.simd/splat.wasm b/test/wasm.simd/splat.wasm new file mode 100755 index 00000000000..217d7adf082 Binary files /dev/null and b/test/wasm.simd/splat.wasm differ diff --git a/test/wasm.simd/splat.wast b/test/wasm.simd/splat.wast new file mode 100644 index 00000000000..2bae2fd9f7f --- /dev/null +++ b/test/wasm.simd/splat.wast @@ -0,0 +1,33 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "i32x4_splat") (param i32 i32) (local m128) + (set_local 2 (i32x4.splat (get_local 1))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "i16x8_splat") (param i32 i32) (local m128) + (set_local 2 (i16x8.splat (get_local 1))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "i8x16_splat") (param i32 i32) (local m128) + (set_local 2 (i8x16.splat (get_local 1))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "f32x4_splat") (param i32 f32) (local m128) + (set_local 2 (f32x4.splat (get_local 1))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) + + (func (export "f64x2_splat") (param i32 f64) (local m128) + (set_local 2 (f64x2.splat (get_local 1))) + (m128.store offset=0 align=4 (get_local 0) (get_local 2)) + ) +) diff --git a/test/wasm.simd/splatNegTests.js b/test/wasm.simd/splatNegTests.js new file mode 100755 index 00000000000..ab84b094613 --- /dev/null +++ b/test/wasm.simd/splatNegTests.js @@ -0,0 +1,132 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +let check = function(expected, funName, ...args) { + let fun = eval(funName); + var result; + try { + result = fun(...args); + } + catch (e) { + result = e.name; + } + + if(result != expected) + { + passed = false; + print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`); + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const arrays = { + "i32x4" : { arr : new Int32Array (memObj.buffer) , len : 4 } , + "i16x8" : { arr : new Int16Array (memObj.buffer) , len : 8 } , + "i8x16" : { arr : new Int8Array (memObj.buffer) , len : 16 } , + "f32x4" : { arr : new Float32Array (memObj.buffer) , len : 4 }, + "f64x2" : { arr : new Float64Array (memObj.buffer) , len : 2 } +}; + +//SPLAT +const splatModule = new WebAssembly.Module(readbuffer('splat.wasm')); +const splatInstance = new WebAssembly.Instance(splatModule, { "dummy" : { "memory" : memObj } }).exports; + +let testSplat = function (funcname, val) { + + const type = funcname.split('_')[0]; + const arr = arrays [type].arr; + const len = arrays [type].len; + + for (let i = 0; i < len; i++) { + arr[i] = val; + } + + splatInstance[funcname](0, val); + + for (let i = 0; i < len; i++) { + assertEquals (val, arr[i]); + } + +} + +testSplat("i32x4_splat", 0); +testSplat("i32x4_splat", -1); +testSplat("i32x4_splat", -2147483648); + +testSplat("i16x8_splat", 0); +testSplat("i16x8_splat", -1); +testSplat("i16x8_splat", 32766); + +testSplat("i8x16_splat", 0); +testSplat("i8x16_splat", -1); +testSplat("i8x16_splat", -128); + +testSplat("f32x4_splat", 0.125); +testSplat("f32x4_splat", 1001.0); +testSplat("f32x4_splat", Number.POSITIVE_INFINITY); +testSplat("f32x4_splat", 1.7014118346046924e+38); + +testSplat("f64x2_splat", 0.125); +testSplat("f64x2_splat", 1001.0); +testSplat("f64x2_splat", Number.POSITIVE_INFINITY); +testSplat("f64x2_splat", 1.7014118346046924e+38); + +//NEG +const module = new WebAssembly.Module(readbuffer('neg.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; + + +let testNeg = function (funcname, val) { + + const type = funcname.split('_')[0]; + const arr = arrays [type].arr; + const len = arrays [type].len; + + for (let i = 0; i < len; i++) { + arr[i] = val; + } + + instance[funcname](0, val); + + for (let i = 0; i < len; i++) { + assertEquals(-val, arr[i]); + } +}; + +testNeg("i32x4_neg", 0); +testNeg("i32x4_neg", 1); +testNeg("i32x4_neg", 2147483647); + +testNeg("i16x8_neg", 0); +testNeg("i16x8_neg", 1); +testNeg("i16x8_neg", 32767); + +testNeg("i8x16_neg", 0); +testNeg("i8x16_neg", 1); +testNeg("i8x16_neg", 127); + +testNeg("f32x4_neg", 0.0); +testNeg("f32x4_neg", 1.0); +testNeg("f32x4_neg", 1000.0); +testNeg("f32x4_neg", Number.POSITIVE_INFINITY, 32); + +testNeg("f64x2_neg", -0.0); +testNeg("f64x2_neg", 1.0); +testNeg("f64x2_neg", 1234.56); +testNeg("f64x2_neg", Number.POSITIVE_INFINITY); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/storeTests.js b/test/wasm.simd/storeTests.js new file mode 100755 index 00000000000..ac01344caee --- /dev/null +++ b/test/wasm.simd/storeTests.js @@ -0,0 +1,69 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw `Expected ${expected}, received ${actual}`; + } +} + +let check = function(expected, funName, ...args) { + let fun = eval(funName); + var result; + try { + result = fun(...args); + } + catch (e) { + result = e.name; + } + + if(result != expected) + { + passed = false; + print(`${funName}(${[...args]}) produced ${result}, expected ${expected}`); + } +} + +const INITIAL_SIZE = 1; +const module = new WebAssembly.Module(readbuffer('stores.wasm')); + +let memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; +let intArray = new Int32Array (memObj.buffer); + +let testStore = function (funcname, ...expected) { + + const index = 0; + + for (let i = 0; i < expected.length; i++) { + intArray[4 + i] = expected[i]; + } + + instance[funcname](index, ...expected); + + for (let i = 0; i < expected.length; i++) + assertEquals(expected[i], intArray[index + i]); +} + + +testStore("m128_store4", 777, 888, 999, 1111); +testStore("m128_store4", -1, 0, 0, -1); +testStore("m128_store4", -1, -1, -1, -1); + + +const MEM_SIZE_IN_BYTES = 1024 * 64; +check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 12, 777, 888, 999, 1111); +check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 8, 777, 888, 999, 1111); +check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 4, 777, 888, 999, 1111); +check("RangeError", "instance.m128_store4_offset", -1, 777, 888, 999, 1111); +check("RangeError", "instance.m128_store4_offset", 0xFFFFFFFC, 777, 888, 999, 1111); +check(undefined, "instance.m128_store4", MEM_SIZE_IN_BYTES - 16, 777, 888, 999, 1111); + +if (passed) { + print("Passed"); +} \ No newline at end of file diff --git a/test/wasm.simd/stores.wasm b/test/wasm.simd/stores.wasm new file mode 100755 index 00000000000..1c43ef024f2 Binary files /dev/null and b/test/wasm.simd/stores.wasm differ diff --git a/test/wasm.simd/stores.wast b/test/wasm.simd/stores.wast new file mode 100755 index 00000000000..0f97dab8580 --- /dev/null +++ b/test/wasm.simd/stores.wast @@ -0,0 +1,18 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "m128_store4") (param i32 i32 i32 i32 i32) (local m128) + (set_local 5 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=0 align=4 (get_local 0) (get_local 5)) + ) + + (func (export "m128_store4_offset") (param i32 i32 i32 i32 i32) (local m128) + (set_local 5 (m128.load offset=0 align=4 (i32.const 16))) + (m128.store offset=16 align=4 (get_local 0) (get_local 5)) + ) +) diff --git a/test/wasm.simd/truncConvTests.js b/test/wasm.simd/truncConvTests.js new file mode 100755 index 00000000000..1dd5f11d373 --- /dev/null +++ b/test/wasm.simd/truncConvTests.js @@ -0,0 +1,98 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft Corporation and contributors. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +let passed = true; + +function assertEquals(expected, actual) { + if (expected != actual) { + passed = false; + throw { message : `Expected ${expected}, received ${actual}`}; + } +} + +const INITIAL_SIZE = 1; +const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE}); +const arrays = { + "i32x4" : new Int32Array (memObj.buffer), + "f32x4" : new Float32Array (memObj.buffer) +}; + +const module = new WebAssembly.Module(readbuffer('truncconv.wasm')); +const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports; + +let testTruncConvTests = function (funcname, args1, resultArr) { + + const len = args1.length; + const inputIndex = conversionType(funcname.split("_")[0]); + const outputIndex = funcname.split("_")[0]; + + const inputArr = arrays[inputIndex]; + const outputArr = arrays[outputIndex]; + + moveArgsIntoArray(args1, 0, inputArr); + instance[funcname](); + moveArgsIntoArray(resultArr, len, outputArr); //should come after instance[funcname]() for checkThrows to work + + for (let i = 0; i < len; i++) { + assertEquals(outputArr[i], outputArr[i + len]); + } + + return true; //otherwise will throw an exception +} + +let checkInternal = function(expected, ...args) { + + var result; + try { + result = testTruncConvTests(...args); + } + catch (e) { + result = e.message.replace("SIMD.Int32x4.FromFloat32x4: ", ""); + } + + if(result != expected) + { + passed = false; + print(`testTruncConvTests(${[...args]}) produced ${result}, expected ${expected}`); + } +} + +let checkThrows = function(expected, ...args) { + checkInternal(expected, ...args); +} + +let check = function (...args) { + checkInternal(true, ...args); +} + +function moveArgsIntoArray(args, offset, arr) { + for (let i = 0; i < args.length; i++) { + arr[offset + i] = args[i]; + } +} + + +let conversionType = function (type) { + return type == "i32x4" ? "f32x4" : "i32x4"; +} + +check("i32x4_trunc_s", [2147483520.0, -1, 1.25, -0.25], [2147483520, -1, 1, 0]); +checkThrows("argument out of range", "i32x4_trunc_s", [2147483520.0, -1, Number.NaN, -0.25]); +checkThrows("argument out of range", "i32x4_trunc_s", [2147483520.0, 2147483647.0, 1.25, -0.25]); +checkThrows("argument out of range", "i32x4_trunc_s", [-4294967040.0, -1, 1.25, -0.25]); + +check("i32x4_trunc_u", [4294967040.0, 2147483520.0, 1.25, 0.25], [4294967040, 2147483520, 1, 0]); +checkThrows("argument out of range", "i32x4_trunc_u", [4294967040.0, 2147483520.0, Number.NaN, 0.25]); +checkThrows("argument out of range", "i32x4_trunc_u", [4294967040.0, 4294967296.0, 1.25, 0.25]); +checkThrows("argument out of range", "i32x4_trunc_u", [4294967040.0, 2147483520.0, 1.25, -1]); + +check("f32x4_convert_s", [2147483647, -2147483647, 0, 65535], [2.14748365e+09, -2.14748365e+09, 0, 65535]); +check("f32x4_convert_s", [101, 1003, -10007, -65535], [101, 1003, -10007, -65535]); +check("f32x4_convert_u", [2147483647, 4294967295, 0, 65535], [2.14748365e+09, 4.29496730e+09, 0, 65535]); +check("f32x4_convert_u", [32767, 9999, 100003, 1], [32767, 9999, 100003, 1]); + +if (passed) { + print("Passed"); +} diff --git a/test/wasm.simd/truncconv.wasm b/test/wasm.simd/truncconv.wasm new file mode 100755 index 00000000000..44bea7e30c6 Binary files /dev/null and b/test/wasm.simd/truncconv.wasm differ diff --git a/test/wasm.simd/truncconv.wast b/test/wasm.simd/truncconv.wast new file mode 100644 index 00000000000..3afaa6b076c --- /dev/null +++ b/test/wasm.simd/truncconv.wast @@ -0,0 +1,28 @@ +;;------------------------------------------------------------------------------------------------------- +;; Copyright (C) Microsoft Corporation and contributors. All rights reserved. +;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +;;------------------------------------------------------------------------------------------------------- + +(module + (import "dummy" "memory" (memory 1)) + + (func (export "i32x4_trunc_s") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i32x4.trunc_s (get_local $v1))) + ) + + (func (export "i32x4_trunc_u") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (i32x4.trunc_u (get_local $v1))) + ) + + (func (export "f32x4_convert_s") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f32x4.convert_s (get_local $v1))) + ) + + (func (export "f32x4_convert_u") (local $v1 m128) + (set_local $v1 (m128.load offset=0 align=4 (i32.const 0))) + (m128.store offset=0 (i32.const 0) (f32x4.convert_u (get_local $v1))) + ) +)