Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 119 additions & 3 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2325,14 +2325,15 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
GenTreeVecCon* vecCon = tree->AsVecCon();

emitter* emit = GetEmitter();
emitAttr attr = emitTypeSize(targetType);

switch (tree->TypeGet())
{
case TYP_SIMD8:
case TYP_SIMD12:
case TYP_SIMD16:
{
emitAttr attr = emitTypeSize(targetType);

// We ignore any differences between SIMD12 and SIMD16 here if we can broadcast the value
// via mvni/movi.
const bool is8 = tree->TypeIs(TYP_SIMD8);
Expand Down Expand Up @@ -2385,6 +2386,109 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
break;
}

case TYP_SIMD:
{
simdscalable_t simdVal = vecCon->gtSimdScalableVal;
insOpts opt = emitter::optGetSveInsOpt(emitTypeSize(simdVal.gtSimdScalableBaseType));
emitAttr emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE;

auto loadConstantHelper = [&](uint64_t constValue) -> regNumber {
// Get a temp integer register to compute long address. Use Extract so multiple calls
// (index + step) get distinct temps when LSRA reserved more than one.
regNumber addrReg = internalRegisters.Extract(tree, RBM_ALLINT);

// Store the index to memory
UNATIVE_OFFSET cnum =
emit->emitDataConst(&constValue, sizeof(constValue), sizeof(constValue), TYP_LONG);
CORINFO_FIELD_HANDLE hnd = m_compiler->eeFindJitDataOffs(cnum);

// Load the constant
emit->emitIns_R_C(INS_ldr, emitSize, addrReg, addrReg, hnd, 0);

return addrReg;
};
Comment thread
a74nh marked this conversation as resolved.

switch (vecCon->gtSimdScalableVal.gtSimdScalableKind)
{
case SimdScalableRepeated:
if (emitter::isValidSimm<8>(simdVal.gtSimdScalableIndex) ||
emitter::isValidSimm_MultipleOf<8, 256>(simdVal.gtSimdScalableIndex))
Comment on lines +2414 to +2415
Copy link

Copilot AI May 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isValidSimm takes an ssize_t but the argument is uint64_t. Converting an out-of-range uint64_t to a signed type is implementation-defined in C++, which can make the check non-portable and harder to reason about (notably for negative values represented in two’s complement). Prefer storing gtSimdScalableIndex/Step as int64_t (if these are conceptually signed immediates) or cast explicitly through int64_t/ssize_t in a way that documents the intended interpretation before calling isValidSimm.

Copilot uses AI. Check for mistakes.
{
emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, simdVal.gtSimdScalableIndex,
opt);
}
else
{
regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex);
emit->emitInsSve_R_R(INS_sve_dup, emitSize, targetReg, indexReg, opt);
}
break;

case SimdScalableSequence:
if (emitter::isValidSimm<5>(simdVal.gtSimdScalableIndex) &&
emitter::isValidSimm<5>(simdVal.gtSimdScalableStep))
{
emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg,
simdVal.gtSimdScalableIndex, simdVal.gtSimdScalableStep, opt);
}
else if (emitter::isValidSimm<5>(simdVal.gtSimdScalableIndex))
{
regNumber stepReg = loadConstantHelper(simdVal.gtSimdScalableStep);
emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg,
simdVal.gtSimdScalableIndex, opt, INS_SCALABLE_OPTS_IMM_FIRST);
}
else if (emitter::isValidSimm<5>(simdVal.gtSimdScalableStep))
{
regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex);
emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg,
simdVal.gtSimdScalableStep, opt);
}
else
{
regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex);
regNumber stepReg = loadConstantHelper(simdVal.gtSimdScalableStep);
emit->emitInsSve_R_R_R(INS_sve_index, emitSize, targetReg, indexReg, stepReg, opt);
}
break;

case SimdScalableScalar:
{
// Clear the entire target register
emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, 0, opt);

// Use NEON instructions to load the constant (to avoid using predicates)

if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) &&
emitter::emitIns_valid_imm_for_mov(simdVal.gtSimdScalableIndex, emitSize))
{
emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndex);
}
else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) &&
emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0]))
{
emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]);
}
else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) &&
emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0]))
{
emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg,
static_cast<double>(simdVal.gtSimdScalableIndexF32[0]));
}
else
{
regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex);
emit->emitIns_R_R(INS_ins, emitSize, targetReg, indexReg, INS_OPTS_16B);
}
break;
}
Comment thread
a74nh marked this conversation as resolved.

default:
unreached();
break;
}
break;
}

default:
{
unreached();
Expand All @@ -2399,14 +2503,26 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
GenTreeMskCon* mask = tree->AsMskCon();
emitter* emit = GetEmitter();

// Try every type until a match is found

if (mask->IsZero())
{
emit->emitInsSve_R(INS_sve_pfalse, EA_SCALABLE, targetReg, INS_OPTS_SCALABLE_B);
break;
}

#if defined(DEBUG)
if (JitConfig.JitUseScalableVectorT() == 1)
{
assert(mask->gtSimdScalableMaskVal.gtSimdMaskScalableIndex == 1);

insOpts opt =
emitter::optGetSveInsOpt(emitTypeSize(mask->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType));
emit->emitIns_R_PATTERN(INS_sve_ptrue, EA_SCALABLE, targetReg, opt, SVE_PATTERN_ALL);
break;
}
#endif // DEBUG

// Fixed length vectors. Try every type until a match is found

insOpts opt = INS_OPTS_SCALABLE_B;
SveMaskPattern pat = EvaluateSimdMaskToPattern<simd16_t>(TYP_BYTE, mask->gtSimdMaskVal);

Expand Down
15 changes: 13 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3236,10 +3236,21 @@ class Compiler
#if defined(FEATURE_SIMD)
GenTreeVecCon* gtNewVconNode(var_types type);
GenTreeVecCon* gtNewVconNode(var_types type, void* data);
#if defined(TARGET_ARM64)
GenTreeVecCon* gtNewSimdVconNode(var_types type, var_types baseType, SimdScalableKind kind, uint64_t index, uint64_t step = 0);

inline GenTreeVecCon* gtNewSimdVconNode(var_types type, simdscalable_t* con)
{
return gtNewSimdVconNode(type, con->gtSimdScalableBaseType, con->gtSimdScalableKind, con->gtSimdScalableIndex, con->gtSimdScalableStep);
}
#endif // TARGET_ARM64
#endif // FEATURE_SIMD

#if defined(FEATURE_MASKED_HW_INTRINSICS)
GenTreeMskCon* gtNewMskConNode(var_types type);
#if defined(TARGET_ARM64)
GenTreeMskCon* gtNewMskConNode(var_types type, var_types baseType, bool index);
#endif // TARGET_ARM64
#endif // FEATURE_MASKED_HW_INTRINSICS

GenTree* gtNewAllBitsSetConNode(var_types type);
Expand Down Expand Up @@ -3348,7 +3359,7 @@ class Compiler
var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize);

#if defined(TARGET_ARM64)
GenTree* gtNewSimdAllTrueMaskNode(var_types simdBaseType);
GenTree* gtNewSimdTrueMaskNode(var_types simdBaseType);
GenTree* gtNewSimdFalseMaskByteNode();
#endif

Expand Down Expand Up @@ -3916,7 +3927,7 @@ class Compiler

#if defined(FEATURE_HW_INTRINSICS)
GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree);
GenTreeMskCon* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon);
GenTree* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon);
#endif // FEATURE_HW_INTRINSICS

// Options to control behavior of gtTryRemoveBoxUpstreamEffects
Expand Down
26 changes: 18 additions & 8 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,14 @@ inline bool genExactlyOneBit(T value)
inline regMaskTP genFindLowestBit(regMaskTP value)
{
#ifdef HAS_MORE_THAN_64_REGISTERS
// If we ever need to use this method for predicate
// registers, then handle it.
assert(value.getHigh() == RBM_NONE);
#endif
if (value.getLow() != RBM_NONE)
{
return regMaskTP(genFindLowestBit(value.getLow()));
}
return regMaskTP(RBM_NONE, genFindLowestBit(value.getHigh()));
#else
return regMaskTP(genFindLowestBit(value.getLow()));
#endif
}

/*****************************************************************************
Expand All @@ -117,11 +120,18 @@ inline regMaskTP genFindLowestBit(regMaskTP value)
inline bool genMaxOneBit(regMaskTP value)
{
#ifdef HAS_MORE_THAN_64_REGISTERS
// If we ever need to use this method for predicate
// registers, then handle it.
assert(value.getHigh() == RBM_NONE);
#endif
if (value.getLow() == RBM_NONE)
{
return genMaxOneBit(value.getHigh());
}
if (value.getHigh() == RBM_NONE)
{
return genMaxOneBit(value.getLow());
}
return false;
#else
return genMaxOneBit(value.getLow());
#endif
}

/*****************************************************************************
Expand Down
32 changes: 16 additions & 16 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,22 +804,6 @@ static bool isValidUimm_MultipleOf(ssize_t value)
return isValidUimm<bits>(value / mod) && (value % mod == 0);
}

// Returns true if 'value' is a legal signed immediate with 'bits' number of bits.
template <const size_t bits>
static bool isValidSimm(ssize_t value)
{
constexpr ssize_t max = 1 << (bits - 1);
return (-max <= value) && (value < max);
}

// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits.
template <const size_t bits, const ssize_t mod>
static bool isValidSimm_MultipleOf(ssize_t value)
{
static_assert(mod != 0);
return isValidSimm<bits>(value / mod) && (value % mod == 0);
}

// Returns true if 'imm' is a valid broadcast immediate for some SVE DUP variants
static bool isValidBroadcastImm(ssize_t imm, emitAttr laneSize)
{
Expand Down Expand Up @@ -1085,6 +1069,22 @@ static bool canEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL, em
// true if 'immDbl' can be encoded using a 'float immediate', also returns the encoding if wbFPI is non-null
static bool canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI = nullptr);

// Returns true if 'value' is a legal signed immediate with 'bits' number of bits.
template <const size_t bits>
static bool isValidSimm(ssize_t value)
{
constexpr ssize_t max = 1 << (bits - 1);
return (-max <= value) && (value < max);
}

// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits.
template <const size_t bits, const ssize_t mod>
static bool isValidSimm_MultipleOf(ssize_t value)
{
static_assert(mod != 0);
return isValidSimm<bits>(value / mod) && (value % mod == 0);
}

// Returns the number of bits used by the given 'size'.
inline static unsigned getBitWidth(emitAttr size)
{
Expand Down
Loading
Loading