Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 16ca06c

Browse files
committed
[Arm64] Add emitIns_R_R_S_S
1 parent 494f00c commit 16ca06c

File tree

4 files changed

+104
-27
lines changed

4 files changed

+104
-27
lines changed

src/jit/codegenarm64.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2597,9 +2597,7 @@ void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* b
25972597
if (base->gtOper == GT_LCL_FLD_ADDR)
25982598
offset += base->gtLclFld.gtLclOffs;
25992599

2600-
// TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
2601-
emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst, base->gtLclVarCommon.gtLclNum, offset);
2602-
emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES);
2600+
emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset);
26032601
}
26042602
else
26052603
{

src/jit/codegenarmarch.cpp

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -686,28 +686,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
686686

687687
noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
688688

689-
// For a 16-byte structSize with GC pointers we will use two ldr and two str instructions
690-
// ldr x2, [x0]
691-
// ldr x3, [x0, #8]
692-
// str x2, [sp, #16]
693-
// str x3, [sp, #24]
694-
//
695-
// For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions
696-
// ldp x2, x3, [x0]
697-
// str x2, [sp, #16]
698-
// str x3, [sp, #24]
699-
//
700-
// For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions
701-
// ldp x2, x3, [x0]
702-
// str x2, [sp, #16]
703-
// str x3, [sp, #24]
689+
// For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
704690
// ldp x2, x3, [x0]
705-
// str x2, [sp, #32]
706-
// str x3, [sp, #40]
707-
//
708-
// Note that when loading from a varNode we currently can't use the ldp instruction
709-
// TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
710-
//
691+
// stp x2, x3, [sp, #16]
711692

712693
int remainingSize = structSize;
713694
unsigned structOffset = 0;
@@ -720,9 +701,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
720701

721702
if (varNode != nullptr)
722703
{
723-
// Load from our varNumImp source, currently we can't use a ldp instruction to do this
724-
emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
725-
emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE);
704+
// Load from our varNumImp source
705+
emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumInp,
706+
0);
726707
}
727708
else
728709
{

src/jit/emitarm64.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6108,6 +6108,101 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
61086108
appendToCurIG(id);
61096109
}
61106110

6111+
/*****************************************************************************
6112+
*
6113+
* Add an instruction referencing two register and consectutive stack-based local variable slots.
6114+
*/
6115+
void emitter::emitIns_R_R_S_S(
6116+
instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
6117+
{
6118+
assert((ins == INS_ldp) || (ins == INS_ldnp));
6119+
assert(EA_8BYTE == EA_SIZE(attr1));
6120+
assert(EA_8BYTE == EA_SIZE(attr2));
6121+
assert(isGeneralRegisterOrZR(reg1));
6122+
assert(isGeneralRegisterOrZR(reg2));
6123+
assert(offs >= 0);
6124+
6125+
emitAttr size = EA_SIZE(attr1);
6126+
insFormat fmt = IF_LS_3B;
6127+
int disp = 0;
6128+
const unsigned scale = 3;
6129+
6130+
/* Figure out the variable's frame position */
6131+
int base;
6132+
bool FPbased;
6133+
6134+
base = emitComp->lvaFrameAddress(varx, &FPbased);
6135+
disp = base + offs;
6136+
6137+
// TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
6138+
regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
6139+
reg3 = encodingSPtoZR(reg3);
6140+
6141+
bool useRegForAdr = true;
6142+
ssize_t imm = disp;
6143+
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
6144+
if (imm == 0)
6145+
{
6146+
useRegForAdr = false;
6147+
}
6148+
else
6149+
{
6150+
if ((imm & mask) == 0)
6151+
{
6152+
ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
6153+
6154+
if ((immShift >= -64) && (immShift <= 63))
6155+
{
6156+
fmt = IF_LS_3C;
6157+
useRegForAdr = false;
6158+
imm = immShift;
6159+
}
6160+
}
6161+
}
6162+
6163+
if (useRegForAdr)
6164+
{
6165+
regNumber rsvd = codeGen->rsGetRsvdReg();
6166+
emitIns_R_R_Imm(INS_add, EA_8BYTE, rsvd, reg3, imm);
6167+
reg3 = rsvd;
6168+
imm = 0;
6169+
}
6170+
6171+
assert(fmt != IF_NONE);
6172+
6173+
instrDesc* id = emitNewInstrCns(attr1, imm);
6174+
6175+
id->idIns(ins);
6176+
id->idInsFmt(fmt);
6177+
id->idInsOpt(INS_OPTS_NONE);
6178+
6179+
if (EA_IS_GCREF(attr2))
6180+
{
6181+
/* A special value indicates a GCref pointer value */
6182+
6183+
id->idGCrefReg2(GCT_GCREF);
6184+
}
6185+
else if (EA_IS_BYREF(attr2))
6186+
{
6187+
/* A special value indicates a Byref pointer value */
6188+
6189+
id->idGCrefReg2(GCT_BYREF);
6190+
}
6191+
6192+
id->idReg1(reg1);
6193+
id->idReg2(reg2);
6194+
id->idReg3(reg3);
6195+
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6196+
id->idSetIsLclVar();
6197+
6198+
#ifdef DEBUG
6199+
id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6200+
#endif
6201+
6202+
dispIns(id);
6203+
appendToCurIG(id);
6204+
}
6205+
61116206
/*****************************************************************************
61126207
*
61136208
* Add an instruction referencing a stack-based local variable and a register

src/jit/emitarm64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,9 @@ void emitIns_S_S_R_R(
763763

764764
void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
765765

766+
void emitIns_R_R_S_S(
767+
instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
768+
766769
void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
767770

768771
void emitIns_R_C(

0 commit comments

Comments
 (0)