diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 25774eea9468f..ef14cf55bbd4c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1075,33 +1075,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCompareInt(GenTree* treeNode); #ifdef FEATURE_SIMD - enum SIMDScalarMoveType{ - SMT_ZeroInitUpper, // zero initlaize target upper bits - SMT_ZeroInitUpper_SrcHasUpperZeros, // zero initialize target upper bits; source upper bits are known to be zero - SMT_PreserveUpper // preserve target upper bits - }; - #ifdef TARGET_ARM64 insOpts genGetSimdInsOpt(emitAttr size, var_types elementType); #endif -#ifdef TARGET_XARCH - instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr); -#endif - void genSIMDScalarMove( - var_types targetType, var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType); - void genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg); - void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode); - void genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID, - var_types simdType, - var_types baseType, - regNumber tmpReg, - regNumber tmpIntReg, - regNumber targetReg); - void genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode); - void genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode); - void genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, regNumber tgtReg); void genSIMDIntrinsic(GenTreeSIMD* simdNode); // TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8cb9db68b9685..0adfcc5484641 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5044,10 +5044,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) switch (simdNode->GetSIMDIntrinsicId()) { - case SIMDIntrinsicInitN: - genSIMDIntrinsicInitN(simdNode); - break; - case SIMDIntrinsicUpperSave: genSIMDIntrinsicUpperSave(simdNode); break; @@ -5095,76 +5091,6 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) return result; } -//------------------------------------------------------------------------------------------- -// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes -// a number of arguments equal to the length of the Vector. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) -{ - assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN); - - regNumber targetReg = simdNode->GetRegNum(); - assert(targetReg != REG_NA); - - var_types targetType = simdNode->TypeGet(); - var_types baseType = simdNode->GetSimdBaseType(); - emitAttr baseTypeSize = emitTypeSize(baseType); - regNumber vectorReg = targetReg; - size_t initCount = simdNode->GetOperandCount(); - - assert((initCount * baseTypeSize) <= simdNode->GetSimdSize()); - - if (varTypeIsFloating(baseType)) - { - // Note that we cannot use targetReg before consuming all float source operands. - // Therefore use an internal temp register - vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - } - - // We will first consume the list items in execution (left to right) order, - // and record the registers. - regNumber operandRegs[FP_REGSIZE_BYTES]; - for (size_t i = 1; i <= initCount; i++) - { - GenTree* operand = simdNode->Op(i); - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - operandRegs[i - 1] = genConsumeReg(operand); - } - - if (initCount * baseTypeSize < EA_16BYTE) - { - GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, vectorReg, 0x00, INS_OPTS_16B); - } - - if (varTypeIsIntegral(baseType)) - { - for (unsigned i = 0; i < initCount; i++) - { - GetEmitter()->emitIns_R_R_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i); - } - } - else - { - for (unsigned i = 0; i < initCount; i++) - { - GetEmitter()->emitIns_R_R_I_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i, 0); - } - } - - // Load the initialized value. - GetEmitter()->emitIns_Mov(INS_mov, EA_16BYTE, targetReg, vectorReg, /* canSkip */ true); - - genProduceReg(simdNode); -} - //----------------------------------------------------------------------------- // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to // the given register, if any, or to memory. diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 12ced2641d167..298d17e9e4cbf 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4882,167 +4882,6 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) return INS_OPTS_NONE; } -// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic -// -// Arguments: -// intrinsicId - SIMD intrinsic Id -// baseType - Base type of the SIMD vector -// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode -// -// -// Return Value: -// Instruction (op) to be used, and immed is set if instruction requires an immediate operand. -// -instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) -{ - NYI("unimplemented on LOONGARCH64 yet"); - return INS_invalid; -} - -//------------------------------------------------------------------------ -// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//------------------------------------------------------------------------------------------- -// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes -// a number of arguments equal to the length of the Vector. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//---------------------------------------------------------------------------------- -// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Notes: -// The Widen intrinsics are broken into separate intrinsics for the two results. -// -void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Notes: -// This intrinsic takes two arguments. The first operand is narrowed to produce the -// lower elements of the results, and the second operand produces the high elements. -// -void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations -// add, sub, mul, bit-wise And, AndNot and Or. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operator -// == and != -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//------------------------------------------------------------------------------------ -// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//------------------------------------------------------------------------------------ -// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - //----------------------------------------------------------------------------- // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to // the given register, if any, or to memory. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 033dffc116150..b771b4ceba10c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8574,16 +8574,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return getBaseJitTypeAndSizeOfSIMDType(typeHnd, nullptr); } - // Get SIMD Intrinsic info given the method handle. - // Also sets typeHnd, argCount, baseType and sizeBytes out params. - const SIMDIntrinsicInfo* getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* typeHnd, - CORINFO_METHOD_HANDLE methodHnd, - CORINFO_SIG_INFO* sig, - bool isNewObj, - unsigned* argCount, - CorInfoType* simdBaseJitType, - unsigned* sizeBytes); - // Pops and returns GenTree node from importers type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. GenTree* impSIMDPopStack(var_types type, bool expectAddr = false, CORINFO_CLASS_HANDLE structType = nullptr); @@ -8593,18 +8583,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool areLocalFieldsContiguous(GenTreeLclFld* first, GenTreeLclFld* second); bool areArrayElementsContiguous(GenTree* op1, GenTree* op2); bool areArgumentsContiguous(GenTree* op1, GenTree* op2); - GenTree* createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize); - - // check methodHnd to see if it is a SIMD method that is expanded as an intrinsic in the JIT. - GenTree* impSIMDIntrinsic(OPCODE opcode, - GenTree* newobjThis, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig, - unsigned methodFlags, - int memberRef); - - GenTree* getOp1ForConstructor(OPCODE opcode, GenTree* newobjThis, CORINFO_CLASS_HANDLE clsHnd); + GenTree* CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_types simdBaseType, unsigned simdSize); // Whether SIMD vector occupies part of SIMD register. // SSE2: vector2f/3f are considered sub register SIMD types. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index cfeff19efa8fe..2ffcd3ae2e0d4 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -6457,12 +6457,6 @@ bool GenTree::OperIsImplicitIndir() const return true; case GT_INTRINSIC: return AsIntrinsic()->gtIntrinsicName == NI_System_Object_GetType; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - return AsSIMD()->OperIsMemoryLoad(); - } -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { @@ -18753,18 +18747,6 @@ var_types GenTreeJitIntrinsic::GetSimdBaseType() const return JitType2PreciseVarType(simdBaseJitType); } -//------------------------------------------------------------------------ -// OperIsMemoryLoad: Does this SIMD intrinsic have memory load semantics? -// -// Return Value: -// Whether this intrinsic may throw NullReferenceException if the -// address is "null". -// -bool GenTreeSIMD::OperIsMemoryLoad() const -{ - return GetSIMDIntrinsicId() == SIMDIntrinsicInitArray; -} - /* static */ bool GenTreeSIMD::Equals(GenTreeSIMD* op1, GenTreeSIMD* op2) { return (op1->TypeGet() == op2->TypeGet()) && (op1->GetSIMDIntrinsicId() == op2->GetSIMDIntrinsicId()) && diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 6c29054b8ddc2..a7cd6c5600ecb 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6330,8 +6330,6 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic } #endif - bool OperIsMemoryLoad() const; - SIMDIntrinsicID GetSIMDIntrinsicId() const { return gtSIMDIntrinsicID; @@ -8701,20 +8699,6 @@ inline bool GenTree::IsVectorCreate() const } #endif // FEATURE_HW_INTRINSICS -#ifdef FEATURE_SIMD - if (OperIs(GT_SIMD)) - { - switch (AsSIMD()->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitN: - return true; - - default: - return false; - } - } -#endif // FEATURE_SIMD - return false; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index fca2ff56aae8a..78357b2918cb5 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -686,12 +686,40 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); + // TODO-CQ: We don't handle contiguous args for anything except TYP_FLOAT today + + GenTree* prevArg = nullptr; + bool areArgsContiguous = (simdBaseType == TYP_FLOAT); + for (int i = sig->numArgs - 1; i >= 0; i--) { - nodeBuilder.AddOperand(i, impPopStack().val); + GenTree* arg = impPopStack().val; + + if (areArgsContiguous) + { + if (prevArg != nullptr) + { + // Recall that we are popping the args off the stack in reverse order. + areArgsContiguous = areArgumentsContiguous(arg, prevArg); + } + + prevArg = arg; + } + + nodeBuilder.AddOperand(i, arg); } - retNode = gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + if (areArgsContiguous) + { + op1 = nodeBuilder.GetOperand(0); + GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, simdSize); + retNode = gtNewOperNode(GT_IND, retType, op1Address); + } + else + { + retNode = + gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + } break; } @@ -736,8 +764,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { - op2 = impSIMDPopStack(retType); - op1 = impSIMDPopStack(retType); + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 69474169db631..5d3f50cb5cb89 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -620,11 +620,23 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, regNumber op1Reg = op1->GetRegNum(); - if ((ins == INS_insertps) && (op1Reg == REG_NA)) + if (ins == INS_insertps) { - // insertps is special and can contain op1 when it is zero - assert(op1->isContained() && op1->IsVectorZero()); - op1Reg = targetReg; + if (op1Reg == REG_NA) + { + // insertps is special and can contain op1 when it is zero + assert(op1->isContained() && op1->IsVectorZero()); + op1Reg = targetReg; + } + + if (op2->isContained() && op2->IsVectorZero()) + { + // insertps can also contain op2 when it is zero in which case + // we just reuse op1Reg since ival specifies the entry to zero + + emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, ival); + return; + } } assert(targetReg != REG_NA); diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 40ef0bc8078ad..03c59de09a0e0 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1058,12 +1058,40 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); + // TODO-CQ: We don't handle contiguous args for anything except TYP_FLOAT today + + GenTree* prevArg = nullptr; + bool areArgsContiguous = (simdBaseType == TYP_FLOAT); + for (int i = sig->numArgs - 1; i >= 0; i--) { - nodeBuilder.AddOperand(i, impPopStack().val); + GenTree* arg = impPopStack().val; + + if (areArgsContiguous) + { + if (prevArg != nullptr) + { + // Recall that we are popping the args off the stack in reverse order. + areArgsContiguous = areArgumentsContiguous(arg, prevArg); + } + + prevArg = arg; + } + + nodeBuilder.AddOperand(i, arg); } - retNode = gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + if (areArgsContiguous) + { + op1 = nodeBuilder.GetOperand(0); + GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, simdSize); + retNode = gtNewOperNode(GT_IND, retType, op1Address); + } + else + { + retNode = + gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + } break; } @@ -1116,11 +1144,20 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { - op2 = impSIMDPopStack(retType); - op1 = impSIMDPopStack(retType); + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + break; } break; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 211420188476c..97e4d33e8e414 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -265,18 +265,6 @@ var_types Compiler::impImportCall(OPCODE opcode, } } -#ifdef FEATURE_SIMD - if (isIntrinsic) - { - call = impSIMDIntrinsic(opcode, newobjThis, clsHnd, methHnd, sig, mflags, pResolvedToken->token); - if (call != nullptr) - { - bIntrinsicImported = true; - goto DONE_CALL; - } - } -#endif // FEATURE_SIMD - if ((mflags & CORINFO_FLG_VIRTUAL) && (mflags & CORINFO_FLG_EnC) && (opcode == CEE_CALLVIRT)) { NO_WAY("Virtual call to a function added via EnC is not supported"); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 916eb11028e43..a5bd33398128b 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1479,7 +1479,7 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* } else { - GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); + GenTree* copyBlkDst = CreateAddressNodeForSimdHWIntrinsicCreate(originalLHS, TYP_FLOAT, simdSize); dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); } diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 20fd0a6216574..e1f0a9d02658e 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -280,19 +280,6 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed); break; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - GenTreeSIMD* simdNode = tree->AsSIMD(); - if (simdNode->OperIsMemoryLoad()) - { - // This instruction loads from memory and we need to record this information - fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed); - } - break; - } -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 4f7d2fb3ac73b..9a2180983bc0a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -384,12 +384,6 @@ GenTree* Lowering::LowerNode(GenTree* node) break; #endif // TARGET_XARCH -#ifdef FEATURE_SIMD - case GT_SIMD: - LowerSIMD(node->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: return LowerHWIntrinsic(node->AsHWIntrinsic()); @@ -7056,11 +7050,6 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckIntrinsic(node->AsOp()); break; #endif // TARGET_XARCH -#ifdef FEATURE_SIMD - case GT_SIMD: - ContainCheckSIMD(node->AsSIMD()); - break; -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: ContainCheckHWIntrinsic(node->AsHWIntrinsic()); @@ -7567,74 +7556,6 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::LowerSIMD(GenTreeSIMD* simdNode) -{ - if (simdNode->TypeGet() == TYP_SIMD12) - { - // GT_SIMD node requiring to produce TYP_SIMD12 in fact - // produces a TYP_SIMD16 result - simdNode->gtType = TYP_SIMD16; - } - - if (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN) - { - assert(simdNode->GetSimdBaseType() == TYP_FLOAT); - - size_t argCount = simdNode->GetOperandCount(); - size_t constArgCount = 0; - float constArgValues[4]{0, 0, 0, 0}; - - for (GenTree* arg : simdNode->Operands()) - { - assert(arg->TypeIs(simdNode->GetSimdBaseType())); - - if (arg->IsCnsFltOrDbl()) - { - noway_assert(constArgCount < ArrLen(constArgValues)); - constArgValues[constArgCount] = static_cast(arg->AsDblCon()->DconValue()); - constArgCount++; - } - } - - if (constArgCount == argCount) - { - for (GenTree* arg : simdNode->Operands()) - { - BlockRange().Remove(arg); - } - - // For SIMD12, even though there might be 12 bytes of constants, we need to store 16 bytes of data - // since we've bashed the node the TYP_SIMD16 and do a 16-byte indirection. - assert(varTypeIsSIMD(simdNode)); - const unsigned cnsSize = genTypeSize(simdNode); - assert(cnsSize <= sizeof(constArgValues)); - - const unsigned cnsAlign = - (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : emitter::dataSection::MIN_DATA_ALIGN; - - CORINFO_FIELD_HANDLE hnd = - comp->GetEmitter()->emitBlkConst(constArgValues, cnsSize, cnsAlign, simdNode->GetSimdBaseType()); - GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd); - BlockRange().InsertBefore(simdNode, clsVarAddr); - simdNode->ChangeOper(GT_IND); - simdNode->AsOp()->gtOp1 = clsVarAddr; - ContainCheckIndir(simdNode->AsIndir()); - - return; - } - } - - ContainCheckSIMD(simdNode); -} -#endif // FEATURE_SIMD - #if defined(FEATURE_HW_INTRINSICS) //---------------------------------------------------------------------------------------------- // Lowering::InsertNewSimdCreateScalarUnsafeNode: Inserts a new simd CreateScalarUnsafe node diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 41d3265452c67..43e4d2fd24055 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -107,9 +107,6 @@ class Lowering final : public Phase void ContainCheckFloatBinary(GenTreeOp* node); void ContainCheckIntrinsic(GenTreeOp* node); #endif // TARGET_XARCH -#ifdef FEATURE_SIMD - void ContainCheckSIMD(GenTreeSIMD* simdNode); -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS void ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr); void ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node); @@ -346,9 +343,6 @@ class Lowering final : public Phase GenTree* LowerArrElem(GenTreeArrElem* arrElem); void LowerRotate(GenTree* tree); void LowerShift(GenTreeOp* shift); -#ifdef FEATURE_SIMD - void LowerSIMD(GenTreeSIMD* simdNode); -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 33702d4bb33f0..6fd900f8a1032 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2496,28 +2496,6 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) -{ - switch (simdNode->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->Op(2)); - break; - - default: - break; - } -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 06e68fc324bc2..3cbdf8715b737 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -503,19 +503,6 @@ void Lowering::LowerRotate(GenTree* tree) ContainCheckShiftRotate(tree->AsOp()); } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::LowerSIMD(GenTreeSIMD* simdNode) -{ - NYI_LOONGARCH64("LowerSIMD"); -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node. @@ -813,19 +800,6 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) -{ - NYI_LOONGARCH64("ContainCheckSIMD"); -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- // ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 5150266e972f5..9bb98072a2b13 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1125,7 +1125,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } node->ChangeHWIntrinsicId(NI_Vector128_GetElement); - LowerNode(node); + return LowerNode(node); } break; } @@ -1136,11 +1136,212 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { assert(node->GetOperandCount() == 3); + var_types simdBaseType = node->GetSimdBaseType(); + // Insert takes either a 32-bit register or a memory operand. // In either case, only SimdBaseType bits are read and so // widening or narrowing the operand may be unnecessary and it // can just be used directly. - node->Op(2) = TryRemoveCastIfPresent(node->GetSimdBaseType(), node->Op(2)); + + node->Op(2) = TryRemoveCastIfPresent(simdBaseType, node->Op(2)); + + if (simdBaseType != TYP_FLOAT) + { + break; + } + assert(intrinsicId == NI_SSE41_Insert); + + // We have Sse41.Insert in which case we can specially handle + // a couple of interesting scenarios involving chains of Inserts + // where one of them involves inserting zero + // + // Given Sse41.Insert has an index: + // * Bits 0-3: zmask + // * Bits 4-5: count_d + // * Bits 6-7: count_s (register form only) + // + // Where zmask specifies which elements to zero + // Where count_d specifies the destination index the value is being inserted to + // Where count_s specifies the source index of the value being inserted + // + // We can recognize `Insert(Insert(vector, zero, index1), value, index2)` and + // transform it into just `Insert(vector, value, index)`. This is because we + // can remove the inner insert and update the relevant index fields. + // + // We can likewise recognize `Insert(Insert(vector, value, index1), zero, index2)` + // and do a similar transformation. + + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + bool op1IsVectorZero = op1->IsVectorZero(); + bool op2IsVectorZero = op2->IsVectorZero(); + + if (op1IsVectorZero && op2IsVectorZero) + { + // While this case is unlikely, we'll handle it here to simplify some + // of the logic that exists below. Effectively `Insert(zero, zero, idx)` + // is always going to produce zero, so we'll just replace ourselves with + // zero. This ensures we don't need to handle a case where op2 is zero + // but not contained. + + GenTree* nextNode = node->gtNext; + + LIR::Use use; + + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(op1); + } + else + { + op1->SetUnusedValue(); + } + + BlockRange().Remove(op2); + op3->SetUnusedValue(); + BlockRange().Remove(node); + + return nextNode; + } + + if (!op3->IsCnsIntOrI()) + { + // Nothing to do if op3 isn't a constant + break; + } + + ssize_t ival = op3->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; + + if (op1IsVectorZero) + { + // When op1 is zero, we can modify the mask to zero + // everything except for the element we're inserting + + zmask |= ~(ssize_t(1) << count_d); + zmask &= 0x0F; + + ival = (count_s << 6) | (count_d << 4) | (zmask); + op3->AsIntConCommon()->SetIconValue(ival); + } + else if (op2IsVectorZero) + { + // When op2 is zero, we can modify the mask to + // directly zero the element we're inserting + + zmask |= (ssize_t(1) << count_d); + zmask &= 0x0F; + + ival = (count_s << 6) | (count_d << 4) | (zmask); + op3->AsIntConCommon()->SetIconValue(ival); + } + + if (zmask == 0x0F) + { + // This is another unlikely case, we'll handle it here to simplify some + // of the logic that exists below. In this case, the zmask says all entries + // should be zeroed out, so we'll just replace ourselves with zero. + + GenTree* nextNode = node->gtNext; + + LIR::Use use; + + if (BlockRange().TryGetUse(node, &use)) + { + GenTree* zeroNode = comp->gtNewZeroConNode(TYP_SIMD16); + BlockRange().InsertBefore(node, zeroNode); + use.ReplaceWith(zeroNode); + } + else + { + // We're an unused zero constant node, so don't both creating + // a new node for something that will never be consumed + } + + op1->SetUnusedValue(); + op2->SetUnusedValue(); + op3->SetUnusedValue(); + BlockRange().Remove(node); + + return nextNode; + } + + if (!op1->OperIsHWIntrinsic()) + { + // Nothing to do if op1 isn't an intrinsic + break; + } + + GenTreeHWIntrinsic* op1Intrinsic = op1->AsHWIntrinsic(); + + if ((op1Intrinsic->GetHWIntrinsicId() != NI_SSE41_Insert) || (op1Intrinsic->GetSimdBaseType() != TYP_FLOAT)) + { + // Nothing to do if op1 isn't a float32 Sse41.Insert + break; + } + + GenTree* op1Idx = op1Intrinsic->Op(3); + + if (!op1Idx->IsCnsIntOrI()) + { + // Nothing to do if op1's index isn't a constant + break; + } + + if (!IsSafeToContainMem(node, op1)) + { + // What we're doing here is effectively similar to containment, + // except for we're deleting the node entirely, so don't we have + // nothing to do if there are side effects between node and op1 + break; + } + + if (op1Intrinsic->Op(2)->IsVectorZero()) + { + // First build up the new index by updating zmask to include + // the zmask from op1. We expect that op2 has already been + // lowered and therefore the containment checks have happened + + assert(op1Intrinsic->Op(2)->isContained()); + + ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue(); + ival |= (op1Ival & 0x0F); + op3->AsIntConCommon()->SetIconValue(ival); + + // Then we'll just carry the original non-zero input and + // remove the now unused constant nodes + + node->Op(1) = op1Intrinsic->Op(1); + + BlockRange().Remove(op1Intrinsic->Op(2)); + BlockRange().Remove(op1Intrinsic->Op(3)); + BlockRange().Remove(op1Intrinsic); + } + else if (op2IsVectorZero) + { + // Since we've already updated zmask to take op2 being zero into + // account, we can basically do the same thing here by merging this + // zmask into the ival from op1. + + ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue(); + ival = op1Ival | zmask; + op3->AsIntConCommon()->SetIconValue(ival); + + // Then we'll just carry the inputs from op1 and remove the now + // unused constant nodes + + node->Op(1) = op1Intrinsic->Op(1); + node->Op(2) = op1Intrinsic->Op(2); + + BlockRange().Remove(op2); + BlockRange().Remove(op1Intrinsic->Op(3)); + BlockRange().Remove(op1Intrinsic); + } break; } @@ -3326,17 +3527,23 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) } assert(result->GetHWIntrinsicId() != intrinsicId); + GenTree* nextNode = LowerNode(result); - LowerNode(result); if (intrinsicId == NI_Vector256_WithElement) { // Now that we have finalized the shape of the tree, lower the insertion node as well. + assert(node->GetHWIntrinsicId() == NI_AVX_InsertVector128); assert(node != result); - LowerNode(node); + + nextNode = LowerNode(node); + } + else + { + assert(node == result); } - return node->gtNext; + return nextNode; } //---------------------------------------------------------------------------------------------- @@ -5934,28 +6141,6 @@ void Lowering::ContainCheckIntrinsic(GenTreeOp* node) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) -{ - switch (simdNode->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->Op(2)); - break; - - default: - break; - } -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- // IsContainableHWIntrinsicOp: Determines whether a child node is containable for a given HWIntrinsic @@ -7145,24 +7330,47 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // Where count_d specifies the destination index the value is being inserted to // Where count_s specifies the source index of the value being inserted - ssize_t ival = lastOp->AsIntConCommon()->IconValue(); - - ssize_t zmask = (ival & 0x0F); - ssize_t count_d = (ival & 0x30) >> 4; - ssize_t count_s = (ival & 0xC0) >> 6; - if (op1->IsVectorZero()) { - // When op1 is zero, we can contain op1 and modify the mask - // to zero everything except for the element we're inserting to +// When op1 is zero, we can contain it and we expect that +// ival is already in the correct state to account for it + +#if DEBUG + ssize_t ival = lastOp->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; + + zmask |= ~(ssize_t(1) << count_d); + zmask &= 0x0F; + + ssize_t expected = (count_s << 6) | (count_d << 4) | (zmask); + assert(ival == expected); +#endif MakeSrcContained(node, op1); + } + else if (op2->IsVectorZero()) + { +// When op2 is zero, we can contain it and we expect that +// zmask is already in the correct state to account for it + +#if DEBUG + ssize_t ival = lastOp->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; - zmask |= ~(1 << count_d); + zmask |= (ssize_t(1) << count_d); zmask &= 0x0F; - ival = (count_s << 6) | (count_d << 4) | (zmask); - lastOp->AsIntConCommon()->SetIconValue(ival); + ssize_t expected = (count_s << 6) | (count_d << 4) | (zmask); + assert(ival == expected); +#endif + + MakeSrcContained(node, op2); } } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e30736a63fd5a..195d1e6523710 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1916,10 +1916,6 @@ class LinearScan : public LinearScanInterface } #endif // TARGET_X86 -#ifdef FEATURE_SIMD - int BuildSIMD(GenTreeSIMD* tree); -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount); #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index c816d33834032..8f5d24ba31304 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -380,12 +380,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - srcCount = BuildSIMD(tree->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); @@ -797,91 +791,6 @@ int LinearScan::BuildNode(GenTree* tree) return srcCount; } -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. -// -// Arguments: -// tree - The GT_SIMD node of interest -// -// Return Value: -// The number of sources consumed by this node. -// -int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) -{ - int srcCount = 0; - assert(!simdTree->isContained()); - int dstCount = simdTree->IsValue() ? 1 : 0; - assert(dstCount == 1); - - bool buildUses = true; - - switch (simdTree->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitN: - { - var_types baseType = simdTree->GetSimdBaseType(); - srcCount = (short)(simdTree->GetSimdSize() / genTypeSize(baseType)); - assert(simdTree->GetOperandCount() == static_cast(srcCount)); - if (varTypeIsFloating(simdTree->GetSimdBaseType())) - { - // Need an internal register to stitch together all the values into a single vector in a SIMD reg. - buildInternalFloatRegisterDefForNode(simdTree); - } - - for (GenTree* operand : simdTree->Operands()) - { - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - BuildUse(operand); - } - - buildUses = false; - break; - } - - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - break; - - case SIMDIntrinsicInitArrayX: - case SIMDIntrinsicInitFixed: - case SIMDIntrinsicCopyToArray: - case SIMDIntrinsicCopyToArrayX: - case SIMDIntrinsicNone: - case SIMDIntrinsicInvalid: - assert(!"These intrinsics should not be seen during register allocation"); - FALLTHROUGH; - - default: - noway_assert(!"Unimplemented SIMD node type."); - unreached(); - } - if (buildUses) - { - assert(srcCount == 0); - srcCount = BuildOperandUses(simdTree->Op(1)); - - if ((simdTree->GetOperandCount() == 2) && !simdTree->Op(2)->isContained()) - { - srcCount += BuildOperandUses(simdTree->Op(2)); - } - } - assert(internalCount <= MaxInternalCount); - buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(simdTree); - } - else - { - assert(dstCount == 0); - } - return srcCount; -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS #include "hwintrinsic.h" diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 0d5a20d6f3dfc..0611b1d5e162d 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -327,12 +327,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - srcCount = BuildSIMD(tree->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); @@ -642,23 +636,6 @@ int LinearScan::BuildNode(GenTree* tree) return srcCount; } -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. -// -// Arguments: -// tree - The GT_SIMD node of interest -// -// Return Value: -// The number of sources consumed by this node. -// -int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) -{ - NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); - return 0; -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS #include "hwintrinsic.h" //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 682fcec159ff6..29205bd4b98c5 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -343,12 +343,6 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildIntrinsic(tree->AsOp()); break; -#ifdef FEATURE_SIMD - case GT_SIMD: - srcCount = BuildSIMD(tree->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); @@ -1905,74 +1899,42 @@ int LinearScan::BuildIntrinsic(GenTree* tree) return srcCount; } -#ifdef FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ -// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. +// SkipContainedCreateScalarUnsafe: Skips a contained CreateScalarUnsafe node +// and gets the underlying op1 instead // // Arguments: -// tree - The GT_SIMD node of interest +// node - The node to handle // // Return Value: -// The number of sources consumed by this node. -// -int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) +// If node is a contained CreateScalarUnsafe, it's op1 is returned; +// otherwise node is returned unchanged. +static GenTree* SkipContainedCreateScalarUnsafe(GenTree* node) { - // All intrinsics have a dstCount of 1 - assert(simdTree->IsValue()); - - bool buildUses = true; - regMaskTP dstCandidates = RBM_NONE; + if (!node->OperIsHWIntrinsic() || !node->isContained()) + { + return node; + } - assert(!simdTree->isContained()); - SetContainsAVXFlags(simdTree->GetSimdSize()); - int srcCount = 0; + GenTreeHWIntrinsic* hwintrinsic = node->AsHWIntrinsic(); + NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); - switch (simdTree->GetSIMDIntrinsicId()) + switch (intrinsicId) { - case SIMDIntrinsicInitN: + case NI_Vector128_CreateScalarUnsafe: + case NI_Vector256_CreateScalarUnsafe: { - var_types baseType = simdTree->GetSimdBaseType(); - srcCount = (short)(simdTree->GetSimdSize() / genTypeSize(baseType)); - assert(simdTree->GetOperandCount() == static_cast(srcCount)); - - // Need an internal register to stitch together all the values into a single vector in a SIMD reg. - buildInternalFloatRegisterDefForNode(simdTree); - - for (GenTree* operand : simdTree->Operands()) - { - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - BuildUse(operand); - } - - buildUses = false; + return hwintrinsic->Op(1); } - break; - - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - break; default: - noway_assert(!"Unimplemented SIMD node type."); - unreached(); - } - if (buildUses) - { - assert(srcCount == 0); - // This is overly conservative, but is here for zero diffs. - GenTree* op1 = simdTree->Op(1); - GenTree* op2 = (simdTree->GetOperandCount() == 2) ? simdTree->Op(2) : nullptr; - srcCount = BuildRMWUses(simdTree, op1, op2); + { + return node; + } } - buildInternalRegisterUses(); - BuildDef(simdTree, dstCandidates); - return srcCount; } -#endif // FEATURE_SIMD -#ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ // BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree. // @@ -2011,10 +1973,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - GenTree* op1 = intrinsicTree->Op(1); - GenTree* op2 = (numArgs >= 2) ? intrinsicTree->Op(2) : nullptr; - GenTree* op3 = (numArgs >= 3) ? intrinsicTree->Op(3) : nullptr; - GenTree* lastOp = intrinsicTree->Op(numArgs); + // A contained CreateScalarUnsafe is special in that we're not containing it to load from + // memory and it isn't a constant. Instead, its essentially a "transparent" node we're ignoring + // to simplify the overall IR handling. As such, we need to "skip" such nodes when present and + // get the underlying op1 so that delayFreeUse and other preferencing remains correct. + + GenTree* op1 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(1)); + GenTree* op2 = (numArgs >= 2) ? SkipContainedCreateScalarUnsafe(intrinsicTree->Op(2)) : nullptr; + GenTree* op3 = (numArgs >= 3) ? SkipContainedCreateScalarUnsafe(intrinsicTree->Op(3)) : nullptr; + GenTree* lastOp = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(numArgs)); bool buildUses = true; diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index e2fae6778b323..c0c90c4c19cb4 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -608,37 +608,6 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge assert(comp->IsTargetIntrinsic(node->AsIntrinsic()->gtIntrinsicName)); break; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - GenTreeSIMD* simdNode = node->AsSIMD(); - unsigned simdSize = simdNode->GetSimdSize(); - var_types simdType = comp->getSIMDTypeForSize(simdSize); - - // Certain SIMD trees require rationalizing. - if (simdNode->AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInitArray) - { - // Rewrite this as an explicit load. - JITDUMP("Rewriting GT_SIMD array init as an explicit load:\n"); - unsigned int baseTypeSize = genTypeSize(simdNode->GetSimdBaseType()); - - GenTree* base = simdNode->Op(1); - GenTree* index = (simdNode->GetOperandCount() == 2) ? simdNode->Op(2) : nullptr; - GenTree* address = new (comp, GT_LEA) - GenTreeAddrMode(TYP_BYREF, base, index, baseTypeSize, OFFSETOF__CORINFO_Array__data); - GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address); - - BlockRange().InsertBefore(simdNode, address, ind); - use.ReplaceWith(ind); - BlockRange().Remove(simdNode); - - DISPTREERANGE(BlockRange(), use.Def()); - JITDUMP("\n"); - } - } - break; -#endif // FEATURE_SIMD - default: // Check that we don't have nodes not allowed in HIR here. assert((node->DebugOperKind() & DBK_NOTHIR) == 0); diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index b66db85f71ba9..8f18908134f51 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -906,267 +906,6 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH return simdBaseJitType; } -//-------------------------------------------------------------------------------------- -// getSIMDIntrinsicInfo: get SIMD intrinsic info given the method handle. -// -// Arguments: -// inOutTypeHnd - The handle of the type on which the method is invoked. This is an in-out param. -// methodHnd - The handle of the method we're interested in. -// sig - method signature info -// isNewObj - whether this call represents a newboj constructor call -// argCount - argument count - out pram -// simdBaseJitType - base JIT type of the intrinsic - out param -// sizeBytes - size of SIMD vector type on which the method is invoked - out param -// -// Return Value: -// SIMDIntrinsicInfo struct initialized corresponding to methodHnd. -// Sets SIMDIntrinsicInfo.id to SIMDIntrinsicInvalid if methodHnd doesn't correspond -// to any SIMD intrinsic. Also, sets the out params inOutTypeHnd, argCount, baseType and -// sizeBytes. -// -// Note that VectorMath class doesn't have a base type and first argument of the method -// determines the SIMD vector type on which intrinsic is invoked. In such a case inOutTypeHnd -// is modified by this routine. -// -// TODO-Throughput: The current implementation is based on method name string parsing. -// Although we now have type identification from the VM, the parsing of intrinsic names -// could be made more efficient. -// -const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* inOutTypeHnd, - CORINFO_METHOD_HANDLE methodHnd, - CORINFO_SIG_INFO* sig, - bool isNewObj, - unsigned* argCount, - CorInfoType* simdBaseJitType, - unsigned* sizeBytes) -{ - assert(simdBaseJitType != nullptr); - assert(sizeBytes != nullptr); - - // get simdBaseJitType and size of the type - CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd; - *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(typeHnd, sizeBytes); - - if (typeHnd == m_simdHandleCache->SIMDVectorHandle) - { - // All of the supported intrinsics on this static class take a first argument that's a vector, - // which determines the simdBaseJitType. - // The exception is the IsHardwareAccelerated property, which is handled as a special case. - assert(*simdBaseJitType == CORINFO_TYPE_UNDEF); - assert(sig->numArgs != 0); - { - typeHnd = info.compCompHnd->getArgClass(sig, sig->args); - *inOutTypeHnd = typeHnd; - *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(typeHnd, sizeBytes); - } - } - - if (*simdBaseJitType == CORINFO_TYPE_UNDEF) - { - JITDUMP("NOT a SIMD Intrinsic: unsupported baseType\n"); - return nullptr; - } - - var_types simdBaseType = JitType2PreciseVarType(*simdBaseJitType); - - // account for implicit "this" arg - *argCount = sig->numArgs; - if (sig->hasThis()) - { - *argCount += 1; - } - - // Get the Intrinsic Id by parsing method name. - // - // TODO-Throughput: replace sequential search by binary search by arranging entries - // sorted by method name. - SIMDIntrinsicID intrinsicId = SIMDIntrinsicInvalid; - const char* methodName = info.compCompHnd->getMethodNameFromMetadata(methodHnd, nullptr, nullptr, nullptr); - for (int i = SIMDIntrinsicNone + 1; i < SIMDIntrinsicInvalid; ++i) - { - if (strcmp(methodName, simdIntrinsicInfoArray[i].methodName) == 0) - { - // Found an entry for the method; further check whether it is one of - // the supported base types. - bool found = false; - for (int j = 0; j < SIMD_INTRINSIC_MAX_BASETYPE_COUNT; ++j) - { - // Convention: if there are fewer base types supported than MAX_BASETYPE_COUNT, - // the end of the list is marked by TYP_UNDEF. - if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == TYP_UNDEF) - { - break; - } - - if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == simdBaseType) - { - found = true; - break; - } - } - - if (!found) - { - continue; - } - - // Now, check the arguments. - unsigned int fixedArgCnt = simdIntrinsicInfoArray[i].argCount; - unsigned int expectedArgCnt = fixedArgCnt; - - // First handle SIMDIntrinsicInitN, where the arg count depends on the type. - // The listed arg types include the vector and the first two init values, which is the expected number - // for Vector2. For other cases, we'll check their types here. - if (*argCount > expectedArgCnt) - { - if (i == SIMDIntrinsicInitN) - { - if (*argCount == 3 && typeHnd == m_simdHandleCache->SIMDVector2Handle) - { - expectedArgCnt = 3; - } - else if (*argCount == 4 && typeHnd == m_simdHandleCache->SIMDVector3Handle) - { - expectedArgCnt = 4; - } - else if (*argCount == 5 && typeHnd == m_simdHandleCache->SIMDVector4Handle) - { - expectedArgCnt = 5; - } - } - else if (i == SIMDIntrinsicInitFixed) - { - if (*argCount == 4 && typeHnd == m_simdHandleCache->SIMDVector4Handle) - { - expectedArgCnt = 4; - } - } - } - if (*argCount != expectedArgCnt) - { - continue; - } - - // Validate the types of individual args passed are what is expected of. - // If any of the types don't match with what is expected, don't consider - // as an intrinsic. This will make an older JIT with SIMD capabilities - // resilient to breaking changes to SIMD managed API. - // - // Note that from IL type stack, args get popped in right to left order - // whereas args get listed in method signatures in left to right order. - - int stackIndex = (expectedArgCnt - 1); - - // Track the arguments from the signature - we currently only use this to distinguish - // integral and pointer types, both of which will by TYP_I_IMPL on the importer stack. - CORINFO_ARG_LIST_HANDLE argLst = sig->args; - - CORINFO_CLASS_HANDLE argClass; - for (unsigned int argIndex = 0; found == true && argIndex < expectedArgCnt; argIndex++) - { - bool isThisPtr = ((argIndex == 0) && sig->hasThis()); - - // In case of "newobj SIMDVector(T val)", thisPtr won't be present on type stack. - // We don't check anything in that case. - if (!isThisPtr || !isNewObj) - { - GenTree* arg = impStackTop(stackIndex).val; - var_types argType = arg->TypeGet(); - - var_types expectedArgType; - if (argIndex < fixedArgCnt) - { - // Convention: - // - intrinsicInfo.argType[i] == TYP_UNDEF - intrinsic doesn't have a valid arg at position i - // - intrinsicInfo.argType[i] == TYP_UNKNOWN - arg type should be same as simdBaseType - // Note that we pop the args off in reverse order. - expectedArgType = simdIntrinsicInfoArray[i].argType[argIndex]; - assert(expectedArgType != TYP_UNDEF); - if (expectedArgType == TYP_UNKNOWN) - { - // The type of the argument will be genActualType(*simdBaseType). - expectedArgType = genActualType(simdBaseType); - argType = genActualType(argType); - } - } - else - { - expectedArgType = simdBaseType; - } - - if (!isThisPtr && argType == TYP_I_IMPL) - { - // The reference implementation has a constructor that takes a pointer. - // We don't want to recognize that one. This requires us to look at the CorInfoType - // in order to distinguish a signature with a pointer argument from one with an - // integer argument of pointer size, both of which will be TYP_I_IMPL on the stack. - // TODO-Review: This seems quite fragile. We should consider beefing up the checking - // here. - CorInfoType corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass)); - if (corType == CORINFO_TYPE_PTR) - { - found = false; - } - } - - if (varTypeIsSIMD(argType)) - { - argType = TYP_STRUCT; - } - if (argType != expectedArgType) - { - found = false; - } - } - if (argIndex != 0 || !sig->hasThis()) - { - argLst = info.compCompHnd->getArgNext(argLst); - } - stackIndex--; - } - - // Cross check return type and static vs. instance is what we are expecting. - // If not, don't consider it as an intrinsic. - // Note that ret type of TYP_UNKNOWN means that it is not known apriori and must be same as simdBaseType - if (found) - { - var_types expectedRetType = simdIntrinsicInfoArray[i].retType; - if (expectedRetType == TYP_UNKNOWN) - { - // JIT maps uint/ulong type vars to TYP_INT/TYP_LONG. - expectedRetType = (simdBaseType == TYP_UINT || simdBaseType == TYP_ULONG) - ? genActualType(simdBaseType) - : simdBaseType; - } - - if (JITtype2varType(sig->retType) != expectedRetType || - sig->hasThis() != simdIntrinsicInfoArray[i].isInstMethod) - { - found = false; - } - } - - if (found) - { - intrinsicId = (SIMDIntrinsicID)i; - break; - } - } - } - - if (intrinsicId != SIMDIntrinsicInvalid) - { - JITDUMP("Method %s maps to SIMD intrinsic %s\n", methodName, simdIntrinsicNames[intrinsicId]); - return &simdIntrinsicInfoArray[intrinsicId]; - } - else - { - JITDUMP("Method %s is NOT a SIMD intrinsic\n", methodName); - } - - return nullptr; -} - // Pops and returns GenTree node from importer's type stack. // Normalizes TYP_STRUCT value in case of GT_CALL and GT_RET_EXPR. // @@ -1238,40 +977,6 @@ GenTree* Compiler::impSIMDPopStack(var_types type, bool expectAddr, CORINFO_CLAS return tree; } -//------------------------------------------------------------------------ -// getOp1ForConstructor: Get the op1 for a constructor call. -// -// Arguments: -// opcode - the opcode being handled (needed to identify the CEE_NEWOBJ case) -// newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitialized object. -// clsHnd - The handle of the class of the method. -// -// Return Value: -// The tree node representing the object to be initialized with the constructor. -// -// Notes: -// This method handles the differences between the CEE_NEWOBJ and constructor cases. -// -GenTree* Compiler::getOp1ForConstructor(OPCODE opcode, GenTree* newobjThis, CORINFO_CLASS_HANDLE clsHnd) -{ - GenTree* op1; - if (opcode == CEE_NEWOBJ) - { - op1 = newobjThis; - assert(newobjThis->OperIs(GT_LCL_VAR_ADDR)); - - // push newobj result on type stack - unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); - impPushOnStack(gtNewLclvNode(lclNum, lvaGetRealType(lclNum)), verMakeTypeInfo(clsHnd).NormaliseForStack()); - } - else - { - op1 = impSIMDPopStack(TYP_BYREF); - } - assert(op1->TypeGet() == TYP_BYREF); - return op1; -} - //------------------------------------------------------------------- // Set the flag that indicates that the lclVar referenced by this tree // is used in a SIMD intrinsic. @@ -1460,13 +1165,13 @@ bool Compiler::areArgumentsContiguous(GenTree* op1, GenTree* op2) } //-------------------------------------------------------------------------------------------------------- -// createAddressNodeForSIMDInit: Generate the address node if we want to initialize vector2, vector3 or vector4 +// CreateAddressNodeForSimdHWIntrinsicCreate: Generate the address node if we want to initialize a simd type // from first argument's address. // // Arguments: -// tree - GenTree*. This the tree node which is used to get the address for indir. -// simdsize - unsigned. This the simd vector size. -// arrayElementsCount - unsigned. This is used for generating the boundary check for array. +// tree - The tree node which is used to get the address for indir. +// simdBaseType - The type of the elements in the SIMD node +// simdsize - The simd vector size. // // Return value: // return the address node. @@ -1475,7 +1180,7 @@ bool Compiler::areArgumentsContiguous(GenTree* op1, GenTree* op2) // Currently just supports GT_FIELD and GT_IND(GT_INDEX_ADDR), because we can only verify those nodes // are located contiguously or not. In future we should support more cases. // -GenTree* Compiler::createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize) +GenTree* Compiler::CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_types simdBaseType, unsigned simdSize) { GenTree* byrefNode = nullptr; unsigned offset = 0; @@ -1524,7 +1229,7 @@ GenTree* Compiler::createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize // The length for boundary check should be the maximum index number which should be // (first argument's index number) + (how many array arguments we have) - 1 // = indexVal + arrayElementsCount - 1 - unsigned arrayElementsCount = simdSize / genTypeSize(baseType); + unsigned arrayElementsCount = simdSize / genTypeSize(simdBaseType); checkIndexExpr = gtNewIconNode(indexVal + arrayElementsCount - 1); GenTreeArrLen* arrLen = gtNewArrLen(TYP_INT, arrayRef, (int)OFFSETOF__CORINFO_Array__length, compCurBB); GenTreeBoundsChk* arrBndsChk = @@ -1614,398 +1319,4 @@ void Compiler::impMarkContiguousSIMDFieldAssignments(Statement* stmt) fgPreviousCandidateSIMDFieldAsgStmt = nullptr; } } - -//------------------------------------------------------------------------ -// impSIMDIntrinsic: Check method to see if it is a SIMD method -// -// Arguments: -// opcode - the opcode being handled (needed to identify the CEE_NEWOBJ case) -// newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitialized object. -// clsHnd - The handle of the class of the method. -// method - The handle of the method. -// sig - The call signature for the method. -// memberRef - The memberRef token for the method reference. -// -// Return Value: -// If clsHnd is a known SIMD type, and 'method' is one of the methods that are -// implemented as an intrinsic in the JIT, then return the tree that implements -// it. -// -GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, - GenTree* newobjThis, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE methodHnd, - CORINFO_SIG_INFO* sig, - unsigned methodFlags, - int memberRef) -{ - assert((methodFlags & CORINFO_FLG_INTRINSIC) != 0); - - // Exit early if we are not in one of the SIMD types. - if (!isSIMDClass(clsHnd)) - { - return nullptr; - } - - // Get base type and intrinsic Id - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned size = 0; - unsigned argCount = 0; - const SIMDIntrinsicInfo* intrinsicInfo = - getSIMDIntrinsicInfo(&clsHnd, methodHnd, sig, (opcode == CEE_NEWOBJ), &argCount, &simdBaseJitType, &size); - - // Exit early if the intrinsic is invalid or unrecognized - if ((intrinsicInfo == nullptr) || (intrinsicInfo->id == SIMDIntrinsicInvalid)) - { - return nullptr; - } - - if (!IsBaselineSimdIsaSupported()) - { - // The user disabled support for the baseline ISA so - // don't emit any SIMD intrinsics as they all require - // this at a minimum. - - return nullptr; - } - - SIMDIntrinsicID simdIntrinsicID = intrinsicInfo->id; - var_types simdBaseType; - var_types simdType; - - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - { - simdBaseType = JitType2PreciseVarType(simdBaseJitType); - simdType = getSIMDTypeForSize(size); - } - - bool instMethod = intrinsicInfo->isInstMethod; - var_types callType = JITtype2varType(sig->retType); - if (callType == TYP_STRUCT) - { - // Note that here we are assuming that, if the call returns a struct, that it is the same size as the - // struct on which the method is declared. This is currently true for all methods on Vector types, - // but if this ever changes, we will need to determine the callType from the signature. - assert(info.compCompHnd->getClassSize(sig->retTypeClass) == genTypeSize(simdType)); - callType = simdType; - } - - GenTree* simdTree = nullptr; - GenTree* op1 = nullptr; - GenTree* op2 = nullptr; - GenTree* op3 = nullptr; - GenTree* retVal = nullptr; - GenTree* copyBlkDst = nullptr; - bool doCopyBlk = false; - - switch (simdIntrinsicID) - { - case SIMDIntrinsicInitN: - { - // SIMDIntrinsicInitN - // op2 - list of initializer values stitched into a list - // op1 - byref of vector - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), argCount - 1); - bool initFromFirstArgIndir = false; - - { - assert(simdIntrinsicID == SIMDIntrinsicInitN); - assert(simdBaseType == TYP_FLOAT); - - unsigned initCount = argCount - 1; - unsigned elementCount = getSIMDVectorLength(size, simdBaseType); - noway_assert(initCount == elementCount); - - // Build an array with the N values. - // We must maintain left-to-right order of the args, but we will pop - // them off in reverse order (the Nth arg was pushed onto the stack last). - - GenTree* prevArg = nullptr; - bool areArgsContiguous = true; - for (unsigned i = 0; i < initCount; i++) - { - GenTree* arg = impSIMDPopStack(simdBaseType); - - if (areArgsContiguous) - { - GenTree* curArg = arg; - - if (prevArg != nullptr) - { - // Recall that we are popping the args off the stack in reverse order. - areArgsContiguous = areArgumentsContiguous(curArg, prevArg); - } - prevArg = curArg; - } - - assert(genActualType(arg) == genActualType(simdBaseType)); - nodeBuilder.AddOperand(initCount - i - 1, arg); - } - - if (areArgsContiguous && simdBaseType == TYP_FLOAT) - { - // Since Vector2, Vector3 and Vector4's arguments type are only float, - // we initialize the vector from first argument address, only when - // the simdBaseType is TYP_FLOAT and the arguments are located contiguously in memory - initFromFirstArgIndir = true; - GenTree* op2Address = createAddressNodeForSIMDInit(nodeBuilder.GetOperand(0), size); - var_types simdType = getSIMDTypeForSize(size); - op2 = gtNewOperNode(GT_IND, simdType, op2Address); - } - } - - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - - assert(op1->TypeGet() == TYP_BYREF); - - { - assert(!varTypeIsSmallInt(simdBaseType)); - - if (initFromFirstArgIndir) - { - simdTree = op2; - if (op1->OperIs(GT_LCL_VAR_ADDR)) - { - // label the dst struct's lclvar is used for SIMD intrinsic, - // so that this dst struct won't be promoted. - setLclRelatedToSIMDIntrinsic(op1); - } - } - else - { - simdTree = new (this, GT_SIMD) - GenTreeSIMD(simdType, std::move(nodeBuilder), simdIntrinsicID, simdBaseJitType, size); - } - } - - copyBlkDst = op1; - doCopyBlk = true; - } - break; - - case SIMDIntrinsicInitArray: - case SIMDIntrinsicInitArrayX: - case SIMDIntrinsicCopyToArray: - case SIMDIntrinsicCopyToArrayX: - { - // op3 - index into array in case of SIMDIntrinsicCopyToArrayX and SIMDIntrinsicInitArrayX - // op2 - array itself - // op1 - byref to vector struct - - unsigned int vectorLength = getSIMDVectorLength(size, simdBaseType); - // (This constructor takes only the zero-based arrays.) - // We will add one or two bounds checks: - // 1. If we have an index, we must do a check on that first. - // We can't combine it with the index + vectorLength check because - // a. It might be negative, and b. It may need to raise a different exception - // (captured as SCK_ARG_RNG_EXCPN for CopyTo and Init). - // 2. We need to generate a check (SCK_ARG_EXCPN for CopyTo and Init) - // for the last array element we will access. - // We'll either check against (vectorLength - 1) or (index + vectorLength - 1). - - GenTree* checkIndexExpr = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength - 1); - - // Get the index into the array. If it has been provided, it will be on the - // top of the stack. Otherwise, it is null. - if (argCount == 3) - { - op3 = impSIMDPopStack(TYP_INT); - if (op3->IsIntegralConst(0)) - { - op3 = nullptr; - } - } - else - { - // TODO-CQ: Here, or elsewhere, check for the pattern where op2 is a newly constructed array, and - // change this to the InitN form. - // op3 = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0); - op3 = nullptr; - } - - // Clone the array for use in the bounds check. - op2 = impSIMDPopStack(TYP_REF); - assert(op2->TypeGet() == TYP_REF); - GenTree* arrayRefForArgChk = op2; - GenTree* argRngChk = nullptr; - if ((arrayRefForArgChk->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op2 = fgInsertCommaFormTemp(&arrayRefForArgChk); - } - else - { - op2 = gtCloneExpr(arrayRefForArgChk); - } - assert(op2 != nullptr); - - if (op3 != nullptr) - { - // We need to use the original expression on this, which is the first check. - GenTree* arrayRefForArgRngChk = arrayRefForArgChk; - // Then we clone the clone we just made for the next check. - arrayRefForArgChk = gtCloneExpr(op2); - // We know we MUST have had a cloneable expression. - assert(arrayRefForArgChk != nullptr); - GenTree* index = op3; - if ((index->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op3 = fgInsertCommaFormTemp(&index); - } - else - { - op3 = gtCloneExpr(index); - } - - GenTreeArrLen* arrLen = - gtNewArrLen(TYP_INT, arrayRefForArgRngChk, (int)OFFSETOF__CORINFO_Array__length, compCurBB); - argRngChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_ARG_RNG_EXCPN); - // Now, clone op3 to create another node for the argChk - GenTree* index2 = gtCloneExpr(op3); - assert(index != nullptr); - checkIndexExpr = gtNewOperNode(GT_ADD, TYP_INT, index2, checkIndexExpr); - } - - // Insert a bounds check for index + offset - 1. - // This must be a "normal" array. - SpecialCodeKind op2CheckKind; - if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX) - { - op2CheckKind = SCK_ARG_RNG_EXCPN; - } - else - { - op2CheckKind = SCK_ARG_EXCPN; - } - GenTreeArrLen* arrLen = - gtNewArrLen(TYP_INT, arrayRefForArgChk, (int)OFFSETOF__CORINFO_Array__length, compCurBB); - GenTreeBoundsChk* argChk = - new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(checkIndexExpr, arrLen, op2CheckKind); - - // Create a GT_COMMA tree for the bounds check(s). - op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argChk, op2); - if (argRngChk != nullptr) - { - op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argRngChk, op2); - } - - if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX) - { - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - simdTree = (op3 != nullptr) - ? gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, simdBaseJitType, size) - : gtNewSIMDNode(simdType, op2, SIMDIntrinsicInitArray, simdBaseJitType, size); - copyBlkDst = op1; - doCopyBlk = true; - } - else - { - assert(simdIntrinsicID == SIMDIntrinsicCopyToArray || simdIntrinsicID == SIMDIntrinsicCopyToArrayX); - op1 = impSIMDPopStack(simdType, instMethod); - assert(op1->TypeGet() == simdType); - - // copy vector (op1) to array (op2) starting at index (op3) - simdTree = op1; - copyBlkDst = op2; - if (op3 != nullptr) - { -#ifdef TARGET_64BIT - // Upcast the index: it is safe to use a zero-extending cast since we've bounds checked it above. - op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); -#endif // !TARGET_64BIT - GenTree* elemSizeNode = gtNewIconNode(genTypeSize(simdBaseType), TYP_I_IMPL); - GenTree* indexOffs = gtNewOperNode(GT_MUL, TYP_I_IMPL, op3, elemSizeNode); - copyBlkDst = gtNewOperNode(GT_ADD, TYP_BYREF, copyBlkDst, indexOffs); - } - - copyBlkDst = gtNewOperNode(GT_ADD, TYP_BYREF, copyBlkDst, - gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL)); - doCopyBlk = true; - } - } - break; - - case SIMDIntrinsicInitFixed: - { - // We are initializing a fixed-length vector VLarge with a smaller fixed-length vector VSmall, plus 1 or 2 - // additional floats. - // op4 (optional) - float value for VLarge.W, if VLarge is Vector4, and VSmall is Vector2 - // op3 - float value for VLarge.Z or VLarge.W - // op2 - VSmall - // op1 - byref of VLarge - assert(simdBaseType == TYP_FLOAT); - - GenTree* op4 = nullptr; - if (argCount == 4) - { - op4 = impSIMDPopStack(TYP_FLOAT); - assert(op4->TypeGet() == TYP_FLOAT); - } - op3 = impSIMDPopStack(TYP_FLOAT); - assert(op3->TypeGet() == TYP_FLOAT); - // The input vector will either be TYP_SIMD8 or TYP_SIMD12. - var_types smallSIMDType = TYP_SIMD8; - if ((op4 == nullptr) && (simdType == TYP_SIMD16)) - { - smallSIMDType = TYP_SIMD12; - } - op2 = impSIMDPopStack(smallSIMDType); - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - - // We are going to redefine the operands so that: - // - op3 is the value that's going into the Z position, or null if it's a Vector4 constructor with a single - // operand, and - // - op4 is the W position value, or null if this is a Vector3 constructor. - if (size == 16 && argCount == 3) - { - op4 = op3; - op3 = nullptr; - } - - simdTree = op2; - if (op3 != nullptr) - { - simdTree = gtNewSimdWithElementNode(simdType, simdTree, gtNewIconNode(2, TYP_INT), op3, simdBaseJitType, - size, /* isSimdAsHWIntrinsic */ true); - } - if (op4 != nullptr) - { - simdTree = gtNewSimdWithElementNode(simdType, simdTree, gtNewIconNode(3, TYP_INT), op4, simdBaseJitType, - size, /* isSimdAsHWIntrinsic */ true); - } - - copyBlkDst = op1; - doCopyBlk = true; - } - break; - - default: - assert(!"Unimplemented SIMD Intrinsic"); - return nullptr; - } - -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) - // XArch/Arm64: also indicate that we use floating point registers. - // The need for setting this here is that a method may not have SIMD - // type lclvars, but might be exercising SIMD intrinsics on fields of - // SIMD type. - // - // e.g. public Vector ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; } - compFloatingPointUsed = true; -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) - - // At this point, we have a tree that we are going to store into a destination. - // TODO-1stClassStructs: This should be a simple store or assignment, and should not require - // GTF_ALL_EFFECT for the dest. This is currently emulating the previous behavior of - // block ops. - if (doCopyBlk) - { - GenTree* dest = new (this, GT_BLK) - GenTreeBlk(GT_BLK, simdType, copyBlkDst, typGetBlkLayout(getSIMDTypeSizeInBytes(clsHnd))); - dest->gtFlags |= GTF_GLOB_REF; - retVal = gtNewBlkOpNode(dest, simdTree); - } - - return retVal; -} - #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index b298eb89c2a78..347ae5b6d083c 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -331,6 +331,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, { if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) { + assert(newobjThis == nullptr); impSpillSideEffect(true, verCurrentState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } @@ -396,6 +397,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; GenTree* op3 = nullptr; + GenTree* op4 = nullptr; + GenTree* op5 = nullptr; unsigned numArgs = sig->numArgs; bool isInstanceMethod = false; @@ -1180,11 +1183,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 2: { - if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) + if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic) && (newobjThis == nullptr)) { impSpillSideEffect(true, verCurrentState.esStackDepth - - ((newobjThis == nullptr) ? 2 : 1)DEBUGARG( - "Spilling op1 side effects for SimdAsHWIntrinsic")); + 2 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); @@ -1741,9 +1743,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 3: { - assert(newobjThis == nullptr); - - if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) + if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic) && (newobjThis == nullptr)) { impSpillSideEffect(true, verCurrentState.esStackDepth - 3 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); @@ -1751,6 +1751,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)) { + assert(newobjThis == nullptr); impSpillSideEffect(true, verCurrentState.esStackDepth - 2 DEBUGARG("Spilling op2 side effects for SimdAsHWIntrinsic")); } @@ -1764,9 +1765,21 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = isInstanceMethod ? simdType - : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod, newobjThis); + bool implicitConstructor = isInstanceMethod && (newobjThis == nullptr) && (retType == TYP_VOID); + + if (implicitConstructor) + { + op1 = getArgForHWIntrinsic(TYP_BYREF, argClass, isInstanceMethod, newobjThis); + } + else + { + argType = isInstanceMethod + ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + + op1 = getArgForHWIntrinsic(argType, (newobjThis != nullptr) ? clsHnd : argClass, isInstanceMethod, + newobjThis); + } assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -1842,6 +1855,213 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + case NI_Vector2_Create: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 8); + + if (op2->IsCnsFltOrDbl() && op3->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD8); + + float cnsVal = 0; + + vecCon->gtSimd8Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimd8Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else if (areArgumentsContiguous(op2, op3)) + { + GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 8); + copyBlkSrc = gtNewOperNode(GT_IND, TYP_SIMD8, op2Address); + } + else + { +#if defined(TARGET_XARCH) + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); + + nodeBuilder.AddOperand(0, op2); + nodeBuilder.AddOperand(1, op3); + nodeBuilder.AddOperand(2, gtNewZeroConNode(TYP_FLOAT)); + nodeBuilder.AddOperand(3, gtNewZeroConNode(TYP_FLOAT)); + + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD8, std::move(nodeBuilder), NI_Vector128_Create, + simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ true); +#elif defined(TARGET_ARM64) + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, op3, NI_Vector64_Create, simdBaseJitType, + 8, /* isSimdAsHWIntrinsic */ true); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + } + + copyBlkDst = op1; + break; + } + + case NI_Vector3_CreateFromVector2: + case NI_Vector4_CreateFromVector3: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert((simdSize == 12) || (simdSize == 16)); + + // TODO-CQ: We should be able to check for contiguous args here after + // the relevant methods are updated to support more than just float + + if (op2->IsCnsVec() && op3->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = op2->AsVecCon(); + vecCon->gtType = simdType; + + if (simdSize == 12) + { + vecCon->gtSimd12Val.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + } + else + { + vecCon->gtSimd16Val.f32[3] = static_cast(op3->AsDblCon()->DconValue()); + } + + copyBlkSrc = vecCon; + } + else + { + GenTree* idx = gtNewIconNode((simdSize == 12) ? 2 : 3, TYP_INT); + copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op3, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; + } + + case 4: + { + assert(isInstanceMethod); + assert(SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)); + + if (newobjThis == nullptr) + { + impSpillSideEffect(true, verCurrentState.esStackDepth - + 4 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + } + + assert(!SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)); + + CORINFO_ARG_LIST_HANDLE arg2 = argList; + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass))); + op4 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + if ((newobjThis == nullptr) && (retType == TYP_VOID)) + { + op1 = getArgForHWIntrinsic(TYP_BYREF, argClass, isInstanceMethod, newobjThis); + } + else + { + op1 = getArgForHWIntrinsic(simdType, (newobjThis != nullptr) ? clsHnd : argClass, isInstanceMethod, + newobjThis); + } + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { + case NI_Vector3_Create: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 12); + + if (op2->IsCnsFltOrDbl() && op3->IsCnsFltOrDbl() && op4->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD12); + + float cnsVal = 0; + + vecCon->gtSimd12Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimd12Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimd12Val.f32[2] = static_cast(op4->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else if (areArgumentsContiguous(op2, op3) && areArgumentsContiguous(op3, op4)) + { + GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 12); + copyBlkSrc = gtNewOperNode(GT_IND, TYP_SIMD12, op2Address); + } + else + { + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); + + nodeBuilder.AddOperand(0, op2); + nodeBuilder.AddOperand(1, op3); + nodeBuilder.AddOperand(2, op4); + nodeBuilder.AddOperand(3, gtNewZeroConNode(TYP_FLOAT)); + + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD12, std::move(nodeBuilder), NI_Vector128_Create, + simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + + case NI_Vector4_CreateFromVector2: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 16); + + // TODO-CQ: We should be able to check for contiguous args here after + // the relevant methods are updated to support more than just float + + if (op2->IsCnsVec() && op3->IsCnsFltOrDbl() && op4->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = op2->AsVecCon(); + vecCon->gtType = simdType; + + vecCon->gtSimd16Val.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[3] = static_cast(op4->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else + { + GenTree* idx = gtNewIconNode(2, TYP_INT); + op2 = gtNewSimdWithElementNode(simdType, op2, op3, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); + + idx = gtNewIconNode(3, TYP_INT); + copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op4, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + default: { // Some platforms warn about unhandled switch cases @@ -1849,6 +2069,103 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } } + break; + } + + case 5: + { + assert(isInstanceMethod); + assert(SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)); + + if (newobjThis == nullptr) + { + impSpillSideEffect(true, verCurrentState.esStackDepth - + 5 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + } + + assert(!SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)); + + CORINFO_ARG_LIST_HANDLE arg2 = argList; + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3); + CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass))); + op5 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass))); + op4 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + if ((newobjThis == nullptr) && (retType == TYP_VOID)) + { + op1 = getArgForHWIntrinsic(TYP_BYREF, argClass, isInstanceMethod, newobjThis); + } + else + { + op1 = getArgForHWIntrinsic(simdType, (newobjThis != nullptr) ? clsHnd : argClass, isInstanceMethod, + newobjThis); + } + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { + case NI_Vector4_Create: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 16); + + if (op2->IsCnsFltOrDbl() && op3->IsCnsFltOrDbl() && op4->IsCnsFltOrDbl() && op5->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16); + + float cnsVal = 0; + + vecCon->gtSimd16Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[2] = static_cast(op4->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[3] = static_cast(op5->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else if (areArgumentsContiguous(op2, op3) && areArgumentsContiguous(op3, op4) && + areArgumentsContiguous(op4, op5)) + { + GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 16); + copyBlkSrc = gtNewOperNode(GT_IND, TYP_SIMD16, op2Address); + } + else + { + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); + + nodeBuilder.AddOperand(0, op2); + nodeBuilder.AddOperand(1, op3); + nodeBuilder.AddOperand(2, op4); + nodeBuilder.AddOperand(3, op5); + + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD16, std::move(nodeBuilder), NI_Vector128_Create, + simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; } } diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index 31f56a584a83a..405923bb2e3ed 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h @@ -38,6 +38,7 @@ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, Create, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -58,7 +59,9 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, Create, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector3, CreateFromVector2, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero , NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -78,7 +81,10 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, Create, ".ctor", 5, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector2, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector3, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector3, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index a8a09bc0cfa98..3fd2f87a1a539 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h @@ -38,6 +38,7 @@ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, Create, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -58,7 +59,9 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, Create, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector3, CreateFromVector2, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -78,7 +81,10 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, Create, ".ctor", 5, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector2, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector3, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector3, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index ced687fcff017..a05e281fa54ef 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -48,266 +48,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM 0b1010 #define ROUNDPS_TOWARD_ZERO_IMM 0b1011 -// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic -// -// Arguments: -// intrinsicId - SIMD intrinsic Id -// baseType - Base type of the SIMD vector -// ival - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode -// -// -// Return Value: -// Instruction (op) to be used, and ival is set if instruction requires an immediate operand. -// -instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) -{ - // Minimal required instruction set is SSE2. - assert(compiler->getSIMDSupportLevel() >= SIMD_SSE2_Supported); - - instruction result = INS_invalid; - switch (intrinsicId) - { - case SIMDIntrinsicShiftLeftInternal: - switch (baseType) - { - case TYP_SIMD16: - // For SSE2, entire vector is shifted, for AVX2, 16-byte chunks are shifted. - result = INS_pslldq; - break; - case TYP_UINT: - case TYP_INT: - result = INS_pslld; - break; - case TYP_SHORT: - case TYP_USHORT: - result = INS_psllw; - break; - default: - assert(!"Invalid baseType for SIMDIntrinsicShiftLeftInternal"); - result = INS_invalid; - break; - } - break; - - case SIMDIntrinsicShiftRightInternal: - switch (baseType) - { - case TYP_SIMD16: - // For SSE2, entire vector is shifted, for AVX2, 16-byte chunks are shifted. - result = INS_psrldq; - break; - case TYP_UINT: - case TYP_INT: - result = INS_psrld; - break; - case TYP_SHORT: - case TYP_USHORT: - result = INS_psrlw; - break; - default: - assert(!"Invalid baseType for SIMDIntrinsicShiftRightInternal"); - result = INS_invalid; - break; - } - break; - - case SIMDIntrinsicUpperSave: - result = INS_vextractf128; - break; - - case SIMDIntrinsicUpperRestore: - result = INS_insertps; - break; - - default: - assert(!"Unsupported SIMD intrinsic"); - unreached(); - } - - noway_assert(result != INS_invalid); - return result; -} - -// genSIMDScalarMove: Generate code to move a value of type "type" from src mm reg -// to target mm reg, zeroing out the upper bits if and only if specified. -// -// Arguments: -// targetType the target type -// baseType the base type of value to be moved -// targetReg the target reg -// srcReg the src reg -// moveType action to be performed on target upper bits -// -// Return Value: -// None -// -// Notes: -// This is currently only supported for floating point types. -// -void CodeGen::genSIMDScalarMove( - var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType) -{ - assert(varTypeIsFloating(baseType)); - switch (moveType) - { - case SMT_PreserveUpper: - GetEmitter()->emitIns_SIMD_R_R_R(ins_Store(baseType), emitTypeSize(baseType), targetReg, targetReg, srcReg); - break; - - case SMT_ZeroInitUpper: - if (compiler->canUseVexEncoding()) - { - // insertps is a 128-bit only instruction, and clears the upper 128 bits, which is what we want. - // The insertpsImm selects which fields are copied and zero'd of the lower 128 bits, so we choose - // to zero all but the lower bits. - unsigned int insertpsImm = - (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3)); - assert((insertpsImm >= 0) && (insertpsImm <= 255)); - inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, (int8_t)insertpsImm); - } - else - { - if (srcReg == targetReg) - { - // There is no guarantee that upper bits of op1Reg are zero. - // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes. - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); - GetEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); - GetEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - } - else - { - genSIMDZero(targetType, TYP_FLOAT, targetReg); - inst_Mov(baseType, targetReg, srcReg, /* canSkip */ false); - } - } - break; - - case SMT_ZeroInitUpper_SrcHasUpperZeros: - inst_Mov(baseType, targetReg, srcReg, /* canSkip */ true); - break; - - default: - unreached(); - } -} - -void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg) -{ - // We just use `INS_xorps` since `genSIMDZero` is used for both `System.Numerics.Vectors` and - // HardwareIntrinsics. Modern CPUs handle this specially in the renamer and it never hits the - // execution pipeline, additionally `INS_xorps` is always available (when using either the - // legacy or VEX encoding). - inst_RV_RV(INS_xorps, targetReg, targetReg, targetType, emitActualTypeSize(targetType)); -} - -//------------------------------------------------------------------------------------------- -// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes -// a number of arguments equal to the length of the Vector. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) -{ - assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN); - - // Right now this intrinsic is supported only on TYP_FLOAT vectors - var_types baseType = simdNode->GetSimdBaseType(); - noway_assert(baseType == TYP_FLOAT); - - regNumber targetReg = simdNode->GetRegNum(); - assert(targetReg != REG_NA); - - var_types targetType = simdNode->TypeGet(); - - // Note that we cannot use targetReg before consumed all source operands. Therefore, - // Need an internal register to stitch together all the values into a single vector - // in an XMM reg. - regNumber vectorReg = simdNode->GetSingleTempReg(); - - // Zero out vectorReg if we are constructing a vector whose size is not equal to targetType vector size. - // For example in case of Vector4f we don't need to zero when using SSE2. - if (compiler->isSubRegisterSIMDType(simdNode)) - { - genSIMDZero(targetType, baseType, vectorReg); - } - - unsigned int baseTypeSize = genTypeSize(baseType); - instruction insLeftShift = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); - - // We will first consume the list items in execution (left to right) order, - // and record the registers. - regNumber operandRegs[SIMD_INTRINSIC_MAX_PARAM_COUNT]; - size_t initCount = simdNode->GetOperandCount(); - for (size_t i = 1; i <= initCount; i++) - { - GenTree* operand = simdNode->Op(i); - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - operandRegs[i - 1] = genConsumeReg(operand); - } - - unsigned offset = 0; - for (unsigned i = 0; i < initCount; i++) - { - // We will now construct the vector from the list items in reverse order. - // This allows us to efficiently stitch together a vector as follows: - // vectorReg = (vectorReg << offset) - // VectorReg[0] = listItemReg - // Use genSIMDScalarMove with SMT_PreserveUpper in order to ensure that the upper - // bits of vectorReg are not modified. - - regNumber operandReg = operandRegs[initCount - i - 1]; - if (offset != 0) - { - assert((baseTypeSize >= 0) && (baseTypeSize <= 255)); - GetEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, (int8_t)baseTypeSize); - } - genSIMDScalarMove(targetType, baseType, vectorReg, operandReg, SMT_PreserveUpper); - - offset += baseTypeSize; - } - - noway_assert(offset == simdNode->GetSimdSize()); - - // Load the initialized value. - inst_Mov(targetType, targetReg, vectorReg, /* canSkip */ true); - genProduceReg(simdNode); -} - -//-------------------------------------------------------------------------------- -// genSIMDExtractUpperHalf: Generate code to extract the upper half of a SIMD register -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Notes: -// This is used for the WidenHi intrinsic to extract the upper half. -// On SSE*, this is 8 bytes, and on AVX2 it is 16 bytes. -// -void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, regNumber tgtReg) -{ - var_types simdType = simdNode->TypeGet(); - emitAttr emitSize = emitActualTypeSize(simdType); - if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) - { - instruction extractIns = varTypeIsFloating(simdNode->GetSimdBaseType()) ? INS_vextractf128 : INS_vextracti128; - GetEmitter()->emitIns_R_R_I(extractIns, EA_32BYTE, tgtReg, srcReg, 0x01); - } - else - { - instruction shiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); - inst_Mov(simdType, tgtReg, srcReg, /* canSkip */ true); - GetEmitter()->emitIns_R_I(shiftIns, emitSize, tgtReg, 8); - } -} - //----------------------------------------------------------------------------- // genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory. // Since Vector3 is not a hardware supported write size, it is performed @@ -654,10 +394,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) switch (simdNode->GetSIMDIntrinsicId()) { - case SIMDIntrinsicInitN: - genSIMDIntrinsicInitN(simdNode); - break; - case SIMDIntrinsicUpperSave: genSIMDIntrinsicUpperSave(simdNode); break; diff --git a/src/coreclr/jit/simdintrinsiclist.h b/src/coreclr/jit/simdintrinsiclist.h index 54654645c1fa2..74beb2db3d8ca 100644 --- a/src/coreclr/jit/simdintrinsiclist.h +++ b/src/coreclr/jit/simdintrinsiclist.h @@ -38,29 +38,6 @@ ***************************************************************************************************************************************************************************************************************************/ SIMD_INTRINSIC(nullptr, false, None, "None", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// .ctor call or newobj -// This form takes the object plus an array of the base (element) type: -SIMD_INTRINSIC(".ctor", true, InitArray, "initArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -// This form takes the object, an array of the base (element) type, and an index into the array: -SIMD_INTRINSIC(".ctor", true, InitArrayX, "initArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -// This form takes the object, and N values of the base (element) type. The actual number of arguments depends upon the Vector size, which must be a fixed type such as Vector2f/3f/4f -// Right now this intrinsic is supported only on fixed float vectors and hence the supported base types lists only TYP_FLOAT. -// This is currently the intrinsic that has the largest maximum number of operands - if we add new fixed vector types -// with more than 4 elements, the above SIMD_INTRINSIC_MAX_PARAM_COUNT will have to change. -SIMD_INTRINSIC(".ctor", true, InitN, "initN", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// This form takes the object, a smaller fixed vector, and one or two additional arguments of the base type, e.g. Vector3 V = new Vector3(V2, x); where V2 is a Vector2, and x is a float. -SIMD_INTRINSIC(".ctor", true, InitFixed, "initFixed", TYP_VOID, 3, {TYP_BYREF, TYP_STRUCT, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) - -// Copy vector to an array -SIMD_INTRINSIC("CopyTo", true, CopyToArray, "CopyToArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("CopyTo", true, CopyToArrayX, "CopyToArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - -#ifdef TARGET_XARCH -// Internal, logical shift operations that shift the entire vector register instead of individual elements of the vector. -SIMD_INTRINSIC("ShiftLeftInternal", false, ShiftLeftInternal, "<< Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -SIMD_INTRINSIC("ShiftRightInternal", false, ShiftRightInternal, ">> Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -#endif // TARGET_XARCH - // Internal intrinsics for saving & restoring the upper half of a vector register SIMD_INTRINSIC("UpperSave", false, UpperSave, "UpperSave Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("UpperRestore", false, UpperRestore, "UpperRestore Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index e82d187d5e889..b6c74e5fcc679 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -3690,6 +3690,7 @@ ValueNum ValueNumStore::EvalBitCastForConstantArgs(var_types dstType, ValueNum a target_size_t nuint = 0; float float32 = 0; double float64 = 0; + simd8_t simd8 = {}; unsigned char bytes[8] = {}; switch (srcType) @@ -3719,6 +3720,12 @@ ValueNum ValueNumStore::EvalBitCastForConstantArgs(var_types dstType, ValueNum a float64 = ConstantValue(arg0VN); memcpy(bytes, &float64, sizeof(float64)); break; +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + simd8 = ConstantValue(arg0VN); + memcpy(bytes, &simd8, sizeof(simd8)); + break; +#endif // FEATURE_SIMD default: unreached(); } @@ -3759,6 +3766,11 @@ ValueNum ValueNumStore::EvalBitCastForConstantArgs(var_types dstType, ValueNum a case TYP_DOUBLE: memcpy(&float64, bytes, sizeof(float64)); return VNForDoubleCon(float64); +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + memcpy(&simd8, bytes, sizeof(simd8)); + return VNForSimd8Con(simd8); +#endif // FEATURE_SIMD default: unreached(); } @@ -9583,56 +9595,7 @@ void Compiler::fgValueNumberSimd(GenTreeSIMD* tree) ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->Op(1)->gtVNPair, &op1vnp, &op1Xvnp); - ValueNum addrVN = ValueNumStore::NoVN; - bool isMemoryLoad = tree->OperIsMemoryLoad(); - - if (isMemoryLoad) - { - // Currently the only SIMD operation with MemoryLoad semantics is SIMDIntrinsicInitArray - // and it has to be handled specially since it has an optional op2 - // - assert(tree->GetSIMDIntrinsicId() == SIMDIntrinsicInitArray); - - // rationalize rewrites this as an explicit load with op1 as the base address - assert(tree->OperIsImplicitIndir()); - - ValueNumPair op2vnp; - if (tree->GetOperandCount() != 2) - { - // No op2 means that we have an impicit index of zero - op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); - - excSetPair = op1Xvnp; - } - else // We have an explicit index in op2 - { - ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->Op(2)->gtVNPair, &op2vnp, &op2Xvnp); - - excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - } - - assert(vnStore->VNFuncArity(simdFunc) == 2); - addrVN = vnStore->VNForFunc(TYP_BYREF, simdFunc, op1vnp.GetLiberal(), op2vnp.GetLiberal()); - -#ifdef DEBUG - if (verbose) - { - printf("Treating GT_SIMD %s as a ByrefExposed load , addrVN is ", - simdIntrinsicNames[tree->GetSIMDIntrinsicId()]); - vnPrint(addrVN, 0); - } -#endif // DEBUG - - // The address could point anywhere, so it is an ByrefExposed load. - // - ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); - tree->gtVNPair.SetLiberal(loadVN); - tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); - tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); - fgValueNumberAddExceptionSetForIndirection(tree, tree->Op(1)); - return; - } + ValueNum addrVN = ValueNumStore::NoVN; if (tree->GetOperandCount() == 1) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index 90e2aa683ca7b..bb6d235cbffbe 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -565,11 +565,17 @@ public static Vector2 TransformNormal(Vector2 normal, Matrix4x4 matrix) /// is . /// The number of elements in the current instance is greater than in the array. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array) { - CopyTo(array, 0); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (array.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[0]), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -582,34 +588,31 @@ public static Vector2 TransformNormal(Vector2 normal, Matrix4x4 matrix) /// -or- /// is greater than or equal to the array length. /// is multidimensional. - [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array, int index) { - if (array is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if ((index < 0) || (index >= array.Length)) + if ((uint)index >= (uint)array.Length) { ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); } - if ((array.Length - index) < 2) + if ((array.Length - index) < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - array[index] = X; - array[index + 1] = Y; + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[index]), this); } /// Copies the vector to the given .The length of the destination span must be at least 2. /// The destination span which the values are copied into. /// If number of elements in source vector is greater than those available in destination span. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if (destination.Length < 2) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -620,15 +623,15 @@ public static Vector2 TransformNormal(Vector2 normal, Matrix4x4 matrix) /// Attempts to copy the vector to the given . The length of the destination span must be at least 2. /// The destination span which the values are copied into. /// if the source vector was successfully copied to . if is not large enough to hold the source vector. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if (destination.Length < 2) + if (destination.Length < Count) { return false; } Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index e800227711f59..e7aa69589b29f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -587,11 +587,17 @@ public static Vector3 TransformNormal(Vector3 normal, Matrix4x4 matrix) /// is . /// The number of elements in the current instance is greater than in the array. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array) { - CopyTo(array, 0); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (array.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[0]), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -604,36 +610,31 @@ public static Vector3 TransformNormal(Vector3 normal, Matrix4x4 matrix) /// -or- /// is greater than or equal to the array length. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array, int index) { - if (array is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if ((index < 0) || (index >= array.Length)) + if ((uint)index >= (uint)array.Length) { ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); } - if ((array.Length - index) < 3) + if ((array.Length - index) < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - array[index] = X; - array[index + 1] = Y; - array[index + 2] = Z; + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[index]), this); } /// Copies the vector to the given . The length of the destination span must be at least 3. /// The destination span which the values are copied into. /// If number of elements in source vector is greater than those available in destination span. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if (destination.Length < 3) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -644,15 +645,15 @@ public static Vector3 TransformNormal(Vector3 normal, Matrix4x4 matrix) /// Attempts to copy the vector to the given . The length of the destination span must be at least 3. /// The destination span which the values are copied into. /// if the source vector was successfully copied to . if is not large enough to hold the source vector. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if (destination.Length < 3) + if (destination.Length < Count) { return false; } Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index bb12c1d1cab52..dbb62e749bb36 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -672,11 +672,17 @@ public static Vector4 Transform(Vector4 value, Quaternion rotation) /// is . /// The number of elements in the current instance is greater than in the array. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array) { - CopyTo(array, 0); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (array.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[0]), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -689,37 +695,31 @@ public static Vector4 Transform(Vector4 value, Quaternion rotation) /// -or- /// is greater than or equal to the array length. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array, int index) { - if (array is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if ((index < 0) || (index >= array.Length)) + if ((uint)index >= (uint)array.Length) { ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); } - if ((array.Length - index) < 4) + if ((array.Length - index) < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - array[index] = X; - array[index + 1] = Y; - array[index + 2] = Z; - array[index + 3] = W; + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[index]), this); } /// Copies the vector to the given . The length of the destination span must be at least 4. /// The destination span which the values are copied into. /// If number of elements in source vector is greater than those available in destination span. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if (destination.Length < 4) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -730,15 +730,15 @@ public static Vector4 Transform(Vector4 value, Quaternion rotation) /// Attempts to copy the vector to the given . The length of the destination span must be at least 4. /// The destination span which the values are copied into. /// if the source vector was successfully copied to . if is not large enough to hold the source vector. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if (destination.Length < 4) + if (destination.Length < Count) { return false; } Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 55cd24908a060..cd102ac3a720c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -9,6 +9,9 @@ using System.Runtime.Intrinsics; using System.Text; +// We use sizeof(Vector) in a few places and want to ignore the warning that it could be a managed type +#pragma warning disable 8500 + namespace System.Numerics { /* Note: The following patterns are used throughout the code here and are described here @@ -41,7 +44,6 @@ namespace System.Numerics [Intrinsic] public Vector(T value) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); Unsafe.SkipInit(out this); for (int index = 0; index < Count; index++) @@ -55,9 +57,17 @@ public Vector(T value) /// A new with its elements set to the first elements from . /// is null. /// The length of is less than . - [Intrinsic] - public Vector(T[] values) : this(values, 0) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector(T[] values) { + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (values.Length < Count) + { + ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); + } + + this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -66,15 +76,10 @@ public Vector(T[] values) : this(values, 0) /// A new with its elements set to the first elements from . /// is null. /// The length of , starting from , is less than . - [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(T[] values, int index) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (values is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons if ((index < 0) || ((values.Length - index) < Count)) { @@ -91,7 +96,7 @@ public Vector(T[] values, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(ReadOnlySpan values) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons if (values.Length < Count) { @@ -106,11 +111,12 @@ public Vector(ReadOnlySpan values) /// A new with its elements set to the first sizeof() elements from . /// The length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector(ReadOnlySpan values) + public unsafe Vector(ReadOnlySpan values) { + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - if (values.Length < Vector.Count) + if (values.Length < sizeof(Vector)) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } @@ -149,9 +155,7 @@ public static unsafe int Count get { ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); -#pragma warning disable 8500 // sizeof of managed types return sizeof(Vector) / sizeof(T); -#pragma warning restore 8500 } } @@ -652,8 +656,18 @@ internal string DisplayString /// The array to which the current instance is copied. /// is null. /// The length of is less than . - [Intrinsic] - public void CopyTo(T[] destination) => CopyTo(destination, 0); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyTo(T[] destination) + { + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (destination.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), this); + } /// Copies a to a given array starting at the specified index. /// The array to which the current instance is copied. @@ -661,15 +675,10 @@ internal string DisplayString /// is null. /// The length of is less than . /// is negative or greater than the length of . - [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(T[] destination, int startIndex) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (destination is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons if ((uint)startIndex >= (uint)destination.Length) { @@ -681,37 +690,37 @@ public void CopyTo(T[] destination, int startIndex) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned>(ref Unsafe.As(ref destination[startIndex]), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), this); } /// Copies a to a given span. /// The span to which the current instance is copied. /// The length of is less than sizeof(). - public void CopyTo(Span destination) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void CopyTo(Span destination) { ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - if ((uint)destination.Length < (uint)Vector.Count) + if (destination.Length < sizeof(Vector)) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned>(ref MemoryMarshal.GetReference(destination), this); + Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this); } /// Copies a to a given span. /// The span to which the current instance is copied. /// The length of is less than . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); } /// Returns a boolean indicating whether the given Object is equal to this vector instance. @@ -809,33 +818,35 @@ public string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] strin /// Tries to copy a to a given span. /// The span to which the current instance is copied. /// true if the current instance was successfully copied to ; otherwise, false if the length of is less than sizeof(). - public bool TryCopyTo(Span destination) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryCopyTo(Span destination) { ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - if ((uint)destination.Length < (uint)Vector.Count) + if (destination.Length < sizeof(Vector)) { return false; } - Unsafe.WriteUnaligned>(ref MemoryMarshal.GetReference(destination), this); + Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this); return true; } /// Tries to copy a to a given span. /// The span to which the current instance is copied. /// true if the current instance was successfully copied to ; otherwise, false if the length of is less than . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryCopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { return false; } - Unsafe.WriteUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); return true; } } } + +#pragma warning restore CS8500 diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index a1d433731dffe..88db7bd90f960 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -599,6 +599,7 @@ public static unsafe Vector128 ConvertToUInt64(Vector128 vector) /// The length of is less than . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector128 vector, T[] destination) where T : struct { @@ -609,8 +610,7 @@ public static void CopyTo(this Vector128 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), vector); } /// Copies a to a given array starting at the specified index. @@ -622,6 +622,7 @@ public static void CopyTo(this Vector128 vector, T[] destination) /// is negative or greater than the length of . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe void CopyTo(this Vector128 vector, T[] destination, int startIndex) where T : struct { @@ -637,8 +638,7 @@ public static unsafe void CopyTo(this Vector128 vector, T[] destination, i ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), vector); } /// Copies a to a given span. @@ -647,16 +647,16 @@ public static unsafe void CopyTo(this Vector128 vector, T[] destination, i /// The span to which the is copied. /// The length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector128 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector128.Count) + if (destination.Length < Vector128.Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); } /// Creates a new instance with all elements initialized to the specified value. @@ -779,6 +779,7 @@ public static unsafe Vector128 Create(T value) /// The length of is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Create(T[] values) where T : struct { @@ -789,8 +790,7 @@ public static Vector128 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -801,6 +801,7 @@ public static Vector128 Create(T[] values) /// The length of , starting from , is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Create(T[] values, int index) where T : struct { @@ -811,8 +812,7 @@ public static Vector128 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. @@ -830,8 +830,7 @@ public static Vector128 Create(ReadOnlySpan values) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -2774,16 +2773,16 @@ public static unsafe Vector256 ToVector256Unsafe(this Vector128 vector) /// The span to which is copied. /// true if was successfully copied to ; otherwise, false if the length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryCopyTo(this Vector128 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector128.Count) + if (destination.Length < Vector128.Count) { return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 552236ddc4e35..14b8e5bdfd1b0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -525,6 +525,7 @@ public static Vector256 ConvertToUInt64(Vector256 vector) /// The length of is less than . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector256 vector, T[] destination) where T : struct { @@ -535,8 +536,7 @@ public static void CopyTo(this Vector256 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), vector); } /// Copies a to a given array starting at the specified index. @@ -548,6 +548,7 @@ public static void CopyTo(this Vector256 vector, T[] destination) /// is negative or greater than the length of . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector256 vector, T[] destination, int startIndex) where T : struct { @@ -563,8 +564,7 @@ public static void CopyTo(this Vector256 vector, T[] destination, int star ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), vector); } /// Copies a to a given span. @@ -573,16 +573,16 @@ public static void CopyTo(this Vector256 vector, T[] destination, int star /// The span to which the is copied. /// The length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector256 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector256.Count) + if (destination.Length < Vector256.Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); } /// Creates a new instance with all elements initialized to the specified value. @@ -705,6 +705,7 @@ public static Vector256 Create(T value) /// The length of is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Create(T[] values) where T : struct { @@ -715,8 +716,7 @@ public static Vector256 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -727,6 +727,7 @@ public static Vector256 Create(T[] values) /// The length of , starting from , is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Create(T[] values, int index) where T : struct { @@ -737,8 +738,7 @@ public static Vector256 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. @@ -756,8 +756,7 @@ public static Vector256 Create(ReadOnlySpan values) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -2750,16 +2749,16 @@ public static T ToScalar(this Vector256 vector) /// The span to which is copied. /// true if was successfully copied to ; otherwise, false if the length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryCopyTo(this Vector256 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector256.Count) + if (destination.Length < Vector256.Count) { return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 6a897cd90364a..42028ce728b02 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -465,6 +465,7 @@ public static unsafe Vector64 ConvertToUInt64(Vector64 vector) /// The length of is less than . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector64 vector, T[] destination) where T : struct { @@ -475,8 +476,7 @@ public static void CopyTo(this Vector64 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), vector); } /// Copies a to a given array starting at the specified index. @@ -488,6 +488,7 @@ public static void CopyTo(this Vector64 vector, T[] destination) /// is negative or greater than the length of . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe void CopyTo(this Vector64 vector, T[] destination, int startIndex) where T : struct { @@ -503,8 +504,7 @@ public static unsafe void CopyTo(this Vector64 vector, T[] destination, in ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), vector); } /// Copies a to a given span. @@ -513,16 +513,16 @@ public static unsafe void CopyTo(this Vector64 vector, T[] destination, in /// The span to which is copied. /// The length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector64 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector64.Count) + if (destination.Length < Vector64.Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); } /// Creates a new instance with all elements initialized to the specified value. @@ -646,6 +646,7 @@ public static unsafe Vector64 Create(T value) /// The length of is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Create(T[] values) where T : struct { @@ -656,8 +657,7 @@ public static Vector64 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -668,6 +668,7 @@ public static Vector64 Create(T[] values) /// The length of , starting from , is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Create(T[] values, int index) where T : struct { @@ -678,8 +679,7 @@ public static Vector64 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. @@ -697,8 +697,7 @@ public static Vector64 Create(ReadOnlySpan values) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -2428,16 +2427,16 @@ public static unsafe Vector128 ToVector128Unsafe(this Vector64 vector) /// The span to which is copied. /// true if was successfully copied to ; otherwise, false if the length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryCopyTo(this Vector64 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector64.Count) + if (destination.Length < Vector64.Count) { return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); return true; }