From e32e978260e832020759175f0c48251a936cb684 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 20 Jul 2024 16:18:21 +0200 Subject: [PATCH 1/4] JIT: Add an emitter peephole for for post-indexed addressing This transforms sequences like ```asm ldr x0, [x1] add x1, x1, #8 ``` into the equivalent ```asm ldr x0, [x1], #8 ``` The second half of this change will be having lowering and strength reduction set up the IR such that this transformation kicks in. --- src/coreclr/jit/emitarm64.cpp | 163 ++++++++++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 2 + 2 files changed, 165 insertions(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f831e0aa1b691..709db12835071 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -5844,6 +5844,12 @@ void emitter::emitIns_R_R_I(instruction ins, return; } + if ((reg1 == reg2) && (EA_SIZE(attr) == EA_PTRSIZE) && emitComp->opts.OptimizationEnabled() && + OptimizePostIndexed(ins, reg1, imm, attr)) + { + return; + } + reg1 = encodingSPtoZR(reg1); reg2 = encodingSPtoZR(reg2); } @@ -11070,6 +11076,40 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= ((code_t)imm << 12); // iiiiiiiii code |= insEncodeReg_Rn(id->idReg2()); // nnnnn dst += emitOutput_Instr(dst, code); + + // With pre or post-indexing we may have a second GC register to + // update. + if (insOptsIndexed(id->idInsOpt()) && !id->idIsSmallDsc()) + { + // Handle GC update here: we may have up to two updated GC + // registers by this instruction. + if (emitInsIsLoad(ins)) + { + // Load will write the destination (reg1). + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + } + else + { + emitGCregDeadUpd(id->idReg1(), dst); + } + } + + // We will always write reg2. Note that it is stored encoded, + // so we have to decode it. + if (id->idGCrefReg2() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); + } + else + { + emitGCregDeadUpd(id->idReg2(), dst); + } + + goto SKIP_GC_UPDATE; + } + break; case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn @@ -17150,6 +17190,129 @@ bool emitter::IsOptimizableLdrToMov( return true; } +//----------------------------------------------------------------------------------- +// OptimizePostIndexed: Optimize an addition/subtraction from a register by +// replacing the previous instruction with a post-indexed addressing form if +// possible. +// +// Arguments: +// ins - Whether this is an add or subtraction +// reg - The register that is being updated +// imm - Immediate that is being added/subtracted +// +// Returns: +// True if the previous instruction was optimized to perform the add/sub. +// +bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr) +{ + assert((ins == INS_add) || (ins == INS_sub)); + + if (!emitCanPeepholeLastIns() || !emitInsIsLoadOrStore(emitLastIns->idIns())) + { + return false; + } + + if ((emitLastIns->idInsFmt() != IF_LS_2A) || emitLastIns->idIsTlsGD()) + { + return false; + } + + // Cannot allow post indexing if the load itself is already modifying the + // register. + regNumber loadStoreDataReg = emitLastIns->idReg1(); + if (loadStoreDataReg == reg) + { + // TODO-CQ: Why isn't this ok for stores? They do not modify the + // register. + return false; + } + + // We must be updating the same register that the addressing is happening + // on. The SP register is stored as ZR, so make sure to normalize that too. + regNumber loadStoreAddrReg = encodingZRtoSP(emitLastIns->idReg2()); + if (loadStoreAddrReg != reg) + { + return false; + } + + // Only some stores/loads are eligible + switch (emitLastIns->idIns()) + { + case INS_ldrb: + case INS_strb: + case INS_ldurb: + case INS_sturb: + case INS_ldrh: + case INS_strh: + case INS_ldurh: + case INS_sturh: + case INS_ldrsb: + case INS_ldursb: + case INS_ldrsh: + case INS_ldursh: + case INS_ldrsw: + case INS_ldursw: + case INS_ldr: + case INS_str: + case INS_ldur: + case INS_stur: + break; + + default: + return false; + } + + if (ins == INS_sub) + { + imm = -imm; + } + + // Only some post-indexing offsets can be represented. + if ((imm < -256) || (imm >= 255)) + { + return false; + } + + instruction newIns = emitLastIns->idIns(); + emitAttr newAttr; + + switch (emitLastIns->idGCref()) + { + case GCT_BYREF: + newAttr = EA_BYREF; + break; + case GCT_GCREF: + newAttr = EA_GCREF; + break; + default: + newAttr = emitLastIns->idOpSize(); + break; + } + + emitRemoveLastInstruction(); + + instrDesc* id = emitNewInstrCns(newAttr, imm); + id->idIns(newIns); + id->idInsFmt(IF_LS_2C); + id->idInsOpt(INS_OPTS_POST_INDEX); + + id->idReg1(loadStoreDataReg); + id->idReg2(encodingSPtoZR(loadStoreAddrReg)); + + if (EA_IS_BYREF(regAttr)) + { + id->idGCrefReg2(GCT_BYREF); + } + else if (EA_IS_GCREF(regAttr)) + { + id->idGCrefReg2(GCT_GCREF); + } + + dispIns(id); + appendToCurIG(id); + return true; +} + #if defined(FEATURE_SIMD) //----------------------------------------------------------------------------------- // emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 1aeeb46942e5f..8e2ed80c6cdf2 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -193,6 +193,8 @@ FORCEINLINE bool OptimizeLdrStr(instruction ins, int varx = -1, int offs = -1 DEBUG_ARG(bool useRsvdReg = false)); +bool OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr); + emitLclVarAddr* emitGetLclVarPairLclVar2(instrDesc* id) { assert(id->idIsLclVarPair()); From 75276fc15a2c42150bdcc518f6c9052bf7a06a4a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 20 Jul 2024 16:29:14 +0200 Subject: [PATCH 2/4] Nit --- src/coreclr/jit/emitarm64.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 709db12835071..0e82e130052ee 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11081,8 +11081,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // update. if (insOptsIndexed(id->idInsOpt()) && !id->idIsSmallDsc()) { - // Handle GC update here: we may have up to two updated GC - // registers by this instruction. if (emitInsIsLoad(ins)) { // Load will write the destination (reg1). @@ -11096,8 +11094,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } } - // We will always write reg2. Note that it is stored encoded, - // so we have to decode it. + // We will always write reg2. if (id->idGCrefReg2() != GCT_NONE) { emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); From 3efbdb15c9478c0aec6f06fc4992701091087c82 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 20 Jul 2024 22:31:40 +0200 Subject: [PATCH 3/4] Fix off by one --- src/coreclr/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0e82e130052ee..e9e19aeaecad4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17265,7 +17265,7 @@ bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, e } // Only some post-indexing offsets can be represented. - if ((imm < -256) || (imm >= 255)) + if ((imm < -256) || (imm >= 256)) { return false; } From e8eba1752bc4b6fb8cdf3a1a71b33ecfc0f7de3d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 20 Jul 2024 22:32:51 +0200 Subject: [PATCH 4/4] Remove wrong comment --- src/coreclr/jit/emitarm64.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e9e19aeaecad4..422d539d3b8c0 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17219,8 +17219,6 @@ bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, e regNumber loadStoreDataReg = emitLastIns->idReg1(); if (loadStoreDataReg == reg) { - // TODO-CQ: Why isn't this ok for stores? They do not modify the - // register. return false; }