Skip to content

Commit

Permalink
Handle more than 64 registers - The finale (#103387)
Browse files Browse the repository at this point in the history
* Add predicate registers

* Increase REGNUM_BITS to 7

* Assign float registers if node is mask

* Remove some TODO-SVE present for predicate register implementation

* Make sure to use vector registers if there is no mask

* handle some more printing of predicate registers

* jit format

* try to fix gcc failure

* Revert "try to fix gcc failure"

This reverts commit 5452f6a.

* proper gcc-14 build error fix
  • Loading branch information
kunalspathak committed Jun 14, 2024
1 parent 0d60428 commit b4a1fa2
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 65 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6254,7 +6254,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()

// IF_SVE_CW_4A
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV); // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T>
INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_UNPREDICATED); // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S,
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ class emitter
// x86: 38 bits
// amd64: 38 bits
// arm: 32 bits
// arm64: 44 bits
// arm64: 46 bits
// loongarch64: 28 bits
// risc-v: 28 bits

Expand Down Expand Up @@ -828,7 +828,7 @@ class emitter
// x86: 48 bits
// amd64: 48 bits
// arm: 48 bits
// arm64: 53 bits
// arm64: 55 bits
// loongarch64: 46 bits
// risc-v: 46 bits

Expand All @@ -840,7 +840,7 @@ class emitter
#if defined(TARGET_ARM)
#define ID_EXTRA_BITFIELD_BITS (16)
#elif defined(TARGET_ARM64)
#define ID_EXTRA_BITFIELD_BITS (21)
#define ID_EXTRA_BITFIELD_BITS (23)
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
#define ID_EXTRA_BITFIELD_BITS (14)
#elif defined(TARGET_XARCH)
Expand Down Expand Up @@ -881,7 +881,7 @@ class emitter
// x86: 54/50 bits
// amd64: 55/50 bits
// arm: 54/50 bits
// arm64: 60/55 bits
// arm64: 62/57 bits
// loongarch64: 53/48 bits
// risc-v: 53/48 bits

Expand All @@ -897,7 +897,7 @@ class emitter
// x86: 10/14 bits
// amd64: 9/14 bits
// arm: 10/14 bits
// arm64: 4/9 bits
// arm64: 2/7 bits
// loongarch64: 11/16 bits
// risc-v: 11/16 bits

Expand Down
6 changes: 2 additions & 4 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4250,11 +4250,9 @@ void emitter::emitIns_Mov(

case INS_sve_mov:
{
// TODO-SVE: Remove check for insOptsNone() when predicate registers
// are present.
if (insOptsNone(opt) && isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
{
// assert(insOptsNone(opt));
assert(insOptsNone(opt));

opt = INS_OPTS_SCALABLE_B;
attr = EA_SCALABLE;
Expand Down
6 changes: 2 additions & 4 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3786,9 +3786,7 @@ void emitter::emitInsSve_R_R_R(instruction ins,
// MOV is an alias for CPY, and is always the preferred disassembly.
ins = INS_sve_mov;
}
// TODO-SVE: Change the below check to INS_SCALABLE_OPTS_PREDICATE_MERGE
// once predicate registers are present.
else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV)
else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
{
assert(isVectorRegister(reg1));
assert(isPredicateRegister(reg2));
Expand Down Expand Up @@ -5909,7 +5907,7 @@ void emitter::emitInsSve_R_R_R_R(instruction ins,
{
// mov is a preferred alias for sel
return emitInsSve_R_R_R(INS_sve_mov, attr, reg1, reg2, reg3, opt,
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV);
INS_SCALABLE_OPTS_PREDICATE_MERGE);
}

assert(insOptsScalableStandard(opt));
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,6 @@ enum insScalableOpts : unsigned
INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov)
INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov)
INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV, // Use to distinguish mov (predicated) from other variants
};

// Maps directly to the pattern used in SVE instructions such as cntb.
Expand Down
10 changes: 8 additions & 2 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11518,7 +11518,13 @@ void LinearScan::dumpRegRecordTitleIfNeeded()
if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
{
lastUsedRegNumIndex = 0;
int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
int lastRegNumIndex = compiler->compFloatingPointUsed ?
#ifdef HAS_MORE_THAN_64_REGISTERS
REG_MASK_LAST
#else
REG_FP_LAST
#endif
: REG_INT_LAST;
for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
{
if (registersToDump.IsRegNumInMask((regNumber)regNumIndex))
Expand Down Expand Up @@ -12129,7 +12135,7 @@ void LinearScan::verifyFinalAllocation()

case RefTypeKill:
dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE,
currentRefPosition.registerAssignment);
currentRefPosition.getKillRegisterAssignment());
break;

case RefTypeFixedReg:
Expand Down
43 changes: 25 additions & 18 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1588,30 +1588,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id))
{
SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet();
if (intrin.id == NI_Sve_ConditionalSelect)
if (!varTypeIsMask(intrin.op1->TypeGet()) && !HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
// If this is conditional select, make sure to check the embedded
// operation to determine the predicate mask.
assert(intrinsicTree->GetOperandCount() == 3);
assert(!HWIntrinsicInfo::IsLowMaskedOperation(intrin.id));

if (intrin.op2->OperIs(GT_HWINTRINSIC))
srcCount += BuildOperandUses(intrin.op1);
}
else
{
SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet();
if (intrin.id == NI_Sve_ConditionalSelect)
{
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
const HWIntrinsic intrinEmb(embOp2Node);
if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id))
// If this is conditional select, make sure to check the embedded
// operation to determine the predicate mask.
assert(intrinsicTree->GetOperandCount() == 3);
assert(!HWIntrinsicInfo::IsLowMaskedOperation(intrin.id));

if (intrin.op2->OperIs(GT_HWINTRINSIC))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
const HWIntrinsic intrinEmb(embOp2Node);
if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
}
}
}
}
else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
}
else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
}

srcCount += BuildOperandUses(intrin.op1, predMask);
srcCount += BuildOperandUses(intrin.op1, predMask);
}
}
else if (intrinsicTree->OperIsMemoryLoadOrStore())
{
Expand Down
41 changes: 22 additions & 19 deletions src/coreclr/jit/registerarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,31 @@ REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29")
REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30")
REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31")

// TODO-SVE: Fix once we add predicate registers
REGALIAS(P0, V0)
REGALIAS(P1, V1)
REGALIAS(P2, V2)
REGALIAS(P3, V3)
REGALIAS(P4, V4)
REGALIAS(P5, V5)
REGALIAS(P6, V6)
REGALIAS(P7, V7)
REGALIAS(P8, V8)
REGALIAS(P9, V9)
REGALIAS(P10, V10)
REGALIAS(P11, V11)
REGALIAS(P12, V12)
REGALIAS(P13, V13)
REGALIAS(P14, V14)
REGALIAS(P15, V15)
#define PBASE 64
#define PMASK(x) (1ULL << x)

/*
REGDEF(name, rnum, mask, xname, wname) */
REGDEF(P0, 0+PBASE, PMASK(0), "p0" , "na")
REGDEF(P1, 1+PBASE, PMASK(1), "p1" , "na")
REGDEF(P2, 2+PBASE, PMASK(2), "p2" , "na")
REGDEF(P3, 3+PBASE, PMASK(3), "p3" , "na")
REGDEF(P4, 4+PBASE, PMASK(4), "p4" , "na")
REGDEF(P5, 5+PBASE, PMASK(5), "p5" , "na")
REGDEF(P6, 6+PBASE, PMASK(6), "p6" , "na")
REGDEF(P7, 7+PBASE, PMASK(7), "p7" , "na")
REGDEF(P8, 8+PBASE, PMASK(8), "p8" , "na")
REGDEF(P9, 9+PBASE, PMASK(9), "p9" , "na")
REGDEF(P10, 10+PBASE, PMASK(10), "p10", "na")
REGDEF(P11, 11+PBASE, PMASK(11), "p11", "na")
REGDEF(P12, 12+PBASE, PMASK(12), "p12", "na")
REGDEF(P13, 13+PBASE, PMASK(13), "p13", "na")
REGDEF(P14, 14+PBASE, PMASK(14), "p14", "na")
REGDEF(P15, 15+PBASE, PMASK(15), "p15", "na")


// The registers with values 64 (NBASE) and above are not real register numbers
#define NBASE 64
// The registers with values 80 (NBASE) and above are not real register numbers
#define NBASE 80

REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?")
// This must be last!
Expand Down
11 changes: 2 additions & 9 deletions src/coreclr/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ typedef uint64_t regMaskSmall;
#endif

#ifdef TARGET_ARM64
// #define HAS_MORE_THAN_64_REGISTERS 1
#define HAS_MORE_THAN_64_REGISTERS 1
#endif // TARGET_ARM64

// TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit)
Expand Down Expand Up @@ -267,14 +267,7 @@ struct regMaskTP
static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask)
{
#ifdef HAS_MORE_THAN_64_REGISTERS
if (reg < 64)
{
return regMaskTP(mask, RBM_NONE);
}
else
{
return regMaskTP(RBM_NONE, mask);
}
return (reg < 64) ? regMaskTP(mask, RBM_NONE) : regMaskTP(RBM_NONE, mask);
#else
return regMaskTP(mask, RBM_NONE);
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST);

#define REGNUM_BITS 6 // number of bits in a REG_*
#define REGNUM_BITS 7 // number of bits in a REG_*
#define REGSIZE_BYTES 8 // number of bytes in one general purpose register
#define FP_REGSIZE_BYTES 16 // number of bytes in one FP/SIMD register
#define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
{
regMaskTP regBit = genRegMask(regNum);

if ((regMask & regBit) != 0)
if ((regMask & regBit).IsNonEmpty())
{
// We have a register to display. It gets displayed now if:
// 1. This is the first register to display of a new range of registers (possibly because
Expand Down

0 comments on commit b4a1fa2

Please sign in to comment.