From 3143b218946395834f0bfef741061ac6ef3f5b56 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 30 Mar 2017 11:17:15 +0200 Subject: [PATCH] ARM64: Add big-endian support. Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc. --- Makefile | 4 +-- src/Makefile | 3 ++ src/host/buildvm_asm.c | 8 +++-- src/jit/bcsave.lua | 8 ++--- src/jit/dis_arm64be.lua | 12 +++++++ src/lj_arch.h | 10 +++--- src/lj_asm.c | 3 ++ src/lj_asm_arm64.h | 42 ++++++++++++++++--------- src/lj_ccall.c | 20 ++++++++++-- src/lj_ccall.h | 4 +-- src/lj_ccallback.c | 18 ++++++----- src/lj_emit_arm64.h | 2 +- src/lj_target_arm64.h | 9 +++++- src/vm_arm64.dasc | 69 +++++++++++++++++++++++++++-------------- 14 files changed, 149 insertions(+), 63 deletions(-) create mode 100644 src/jit/dis_arm64be.lua diff --git a/Makefile b/Makefile index 489d7e754c..e6472e0bc8 100644 --- a/Makefile +++ b/Makefile @@ -87,8 +87,8 @@ FILE_PC= luajit.pc FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ - dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \ - dis_mips64el.lua vmdef.lua + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua vmdef.lua ifeq (,$(findstring Windows,$(OS))) HOST_SYS:= $(shell uname -s) diff --git a/src/Makefile b/src/Makefile index 7cb4c14ad9..f56465d138 100644 --- a/src/Makefile +++ b/src/Makefile @@ -242,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 + endif TARGET_LJARCH= arm64 else ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index addf281f35..1a63360289 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) { int i; for (i = 0; i < n; i += 4) { + uint32_t ins = *(uint32_t *)(p+i); +#if LJ_TARGET_ARM64 && LJ_BE + ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ +#endif if ((i & 15) == 0) - fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, "\t.long 0x%08x", ins); else - fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, ",0x%08x", ins); if ((i & 15) == 12) putc('\n', ctx->fp); } if ((n & 15) != 0) putc('\n', ctx->fp); diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 9ee22a013d..c17c88e0ff 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -63,8 +63,8 @@ local map_type = { } local map_arch = { - x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, - mips = true, mipsel = true, + x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, + ppc = true, mips = true, mipsel = true, } local map_os = { @@ -200,7 +200,7 @@ typedef struct { ]] local symname = LJBC_PREFIX..ctx.modname local is64, isbe = false, false - if ctx.arch == "x64" or ctx.arch == "arm64" then + if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then is64 = true elseif ctx.arch == "ppc" or ctx.arch == "mips" then isbe = true @@ -237,7 +237,7 @@ typedef struct { hdr.eendian = isbe and 2 or 1 hdr.eversion = 1 hdr.type = f16(1) - hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) + hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) if ctx.arch == "mips" or ctx.arch == "mipsel" then hdr.flags = f32(0x50001006) end diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua new file mode 100644 index 0000000000..7eb389e2fa --- /dev/null +++ b/src/jit/dis_arm64be.lua @@ -0,0 +1,12 @@ +---------------------------------------------------------------------------- +-- LuaJIT ARM64BE disassembler wrapper module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- ARM64 instructions are always little-endian. So just forward to the +-- common ARM64 disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +return require((string.match(..., ".*%.") or "").."dis_arm64") + diff --git a/src/lj_arch.h b/src/lj_arch.h index 405096071c..fe55815736 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -215,9 +215,14 @@ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 -#define LJ_ARCH_NAME "arm64" #define LJ_ARCH_BITS 64 +#if defined(__AARCH64EB__) +#define LJ_ARCH_NAME "arm64be" +#define LJ_ARCH_ENDIAN LUAJIT_BE +#else +#define LJ_ARCH_NAME "arm64" #define LJ_ARCH_ENDIAN LUAJIT_LE +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ @@ -409,9 +414,6 @@ #error "Only ARM EABI or iOS 3.0+ ABI is supported" #endif #elif LJ_TARGET_ARM64 -#if defined(__AARCH64EB__) -#error "No support for big-endian ARM64" -#endif #if defined(_ILP32) #error "No support for ILP32 model on ARM64" #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 7c09dd9f50..c2cf5a95a5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2393,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) if (!as->loopref) asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); +#if LJ_TARGET_MCODE_FIXUP + asm_mcode_fixup(T->mcode, T->szmcode); +#endif lj_mcode_sync(T->mcode, origtop); } diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index b58ab3a183..8fd92e76fd 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) asm_mclimit(as); /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_BL|((-3-i)&0x03ffffffu); - *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); + *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); + *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); mxp--; - *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); - *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); + *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); + *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); as->mctop = mxp; } @@ -431,7 +431,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) fpr++; } else { Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); ofs += 8; } } else { @@ -441,7 +441,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) gpr++; } else { Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); ofs += 8; } } @@ -1082,7 +1082,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) src = ra_alloc1(as, ir->op2, allow); rset_clear(allow, src); if (irt_isinteger(ir->t)) - type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); + type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); else type = ra_allock(as, irt_toitype(ir->t), allow); } else { @@ -1179,7 +1179,8 @@ static void asm_sload(ASMState *as, IRIns *ir) } if (ra_hasreg(dest)) { emit_lso(as, irt_isnum(t) ? A64I_LDRd : - (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); + (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, + ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); } } @@ -1909,7 +1910,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); if (spadj == 0) { - *--p = A64I_NOP; + *--p = A64I_LE(A64I_NOP); as->mctop = p; } else { /* Patch stack adjustment. */ @@ -1962,6 +1963,19 @@ static void asm_setup_target(ASMState *as) asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); } +#if LJ_BE +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +static void asm_mcode_fixup(MCode *mcode, MSize size) +{ + MCode *pe = (MCode *)((char *)mcode + size); + while (mcode < pe) { + MCode ins = *mcode; + *mcode++ = lj_bswap(ins); + } +} +#define LJ_TARGET_MCODE_FIXUP 1 +#endif + /* -- Trace patching ------------------------------------------------------ */ /* Patch exit jumps of existing machine code to a new target. */ @@ -1974,29 +1988,29 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MCode *px = exitstub_trace_addr(T, exitno); for (; p < pe; p++) { /* Look for exitstub branch, replace with branch to target. */ - uint32_t ins = *p; + MCode ins = A64I_LE(*p); if ((ins & 0xff000000u) == 0x54000000u && ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { /* Patch bcc exitstub. */ - *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); + *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); cend = p+1; if (!cstart) cstart = p; } else if ((ins & 0xfc000000u) == 0x14000000u && ((ins ^ (px-p)) & 0x03ffffffu) == 0) { /* Patch b exitstub. */ - *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); + *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); cend = p+1; if (!cstart) cstart = p; } else if ((ins & 0x7e000000u) == 0x34000000u && ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { /* Patch cbz/cbnz exitstub. */ - *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); + *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); cend = p+1; if (!cstart) cstart = p; } else if ((ins & 0x7e000000u) == 0x36000000u && ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { /* Patch tbz/tbnz exitstub. */ - *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); + *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); cend = p+1; if (!cstart) cstart = p; } diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 2b7ca36456..5c252e5b68 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -301,7 +301,7 @@ unsigned int cl = ccall_classify_struct(cts, ctr); \ if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ CTSize i = (cl >> 8) - 1; \ - do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ + do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ } else { \ if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ memcpy(dp, sp, ctr->size); \ @@ -359,6 +359,13 @@ } \ } +#if LJ_BE +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; +#endif + + #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ @@ -1033,9 +1040,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_ARM64 && LJ_BE + if (isfp && d->size == sizeof(float)) + ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 - if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || - (isfp && nsp == 0)) && d->size <= 4) { + || (isfp && nsp == 0) +#endif + ) && d->size <= 4) { *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ } #endif diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 34e800cc03..59f664817a 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -79,8 +79,8 @@ typedef union FPRArg { typedef intptr_t GPRArg; typedef union FPRArg { double d; - float f; - uint32_t u32; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; } FPRArg; #elif LJ_TARGET_PPC diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index fce6a3ed46..846827b119 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -173,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page) uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; - *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); - *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); - *p++ = A64I_BR | A64F_N(RID_X11); - *p++ = A64I_NOP; + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); + *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); + *p++ = A64I_LE(A64I_NOP); ((void **)p)[0] = target; ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); - *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); + *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); + *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; } lua_assert(p - page <= CALLBACK_MCODE_SIZE); @@ -623,6 +623,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) #if CCALL_NUM_FPR if (ctype_isfp(ctr->info)) dp = (uint8_t *)&cts->cb.fpr[0]; +#endif +#if LJ_TARGET_ARM64 && LJ_BE + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) + dp = (uint8_t *)&cts->cb.fpr[0].f[1]; #endif lj_cconv_ct_tv(cts, ctr, dp, o, 0); #ifdef CALLBACK_HANDLE_RET @@ -637,7 +641,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index cfa18c83c2..6da4c7d4b4 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -140,7 +140,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) } else { goto nopair; } - if (ofsm >= (-64<= (int)((unsigned int)-64<mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); return; diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 3f6bb39be2..520023ae21 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -107,7 +107,7 @@ typedef struct { /* Return the address of a per-trace exit stub. */ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { - while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ + while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ @@ -116,6 +116,13 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) /* -- Instructions -------------------------------------------------------- */ +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +#if LJ_BE +#define A64I_LE(x) (lj_bswap(x)) +#else +#define A64I_LE(x) (x) +#endif + /* Instruction fields. */ #define A64F_D(r) (r) #define A64F_N(r) ((r) << 5) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index bb2496ab18..cfbc61a1cf 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -151,6 +151,21 @@ |.define FRAME_FUNC, #-16 |.define FRAME_PC, #-8 | +|// Endian-specific defines. +|.if ENDIAN_LE +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RB, 3 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RB, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro @@ -717,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx) | cmp CRET1, #1 | bhi ->vmeta_binop |4: - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | add PC, PC, #4 | add RB, PC, RB, lsl #2 | sub RB, RB, #0x20000 @@ -1500,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback | checkint CARG1, ->fff_fallback | mov CARG3, #1 - | mov CARG2, BASE // Points to stack. Little-endian. + | // Point to the char inside the integer in the stack slot. + |.if ENDIAN_LE + | mov CARG2, BASE + |.else + | add CARG2, BASE, #7 + |.endif |->fff_newstr: | // CARG2 = str, CARG3 = len. | str BASE, L->base @@ -1703,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx) | ands TMP0, PC, #FRAME_TYPE | and TMP1, PC, #~FRAME_TYPEP | bne >3 - | ldrb RAw, [PC, #-3] + | ldrb RAw, [PC, #-4+OFS_RA] | lsl RA, RA, #3 | add TMP1, RA, #16 |3: @@ -1838,7 +1858,7 @@ static void build_subroutines(BuildCtx *ctx) |->cont_stitch: // Trace stitching. |.if JIT | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES + | ldr RBw, SAVE_MULTRES | ldr INSw, [PC, #-4] | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | subs RB, RB, #8 @@ -1869,7 +1889,7 @@ static void build_subroutines(BuildCtx *ctx) | | // Stitch a new trace to the previous trace. | mov CARG1, #GL_J(exitno) - | str RA, [GL, CARG1] + | str RAw, [GL, CARG1] | mov CARG1, #GL_J(L) | str L, [GL, CARG1] | str BASE, L->base @@ -1936,6 +1956,9 @@ static void build_subroutines(BuildCtx *ctx) | sub CARG1, CARG1, #2 | ldr CARG2w, [lr] // Load trace number. | st_vmstate CARG4 + |.if ENDIAN_BE + | rev32 CARG2, CARG2 + |.endif | str BASE, L->base | ubfx CARG2w, CARG2w, #5, #16 | str CARG1w, [GL, #GL_J(exitno)] @@ -1967,14 +1990,14 @@ static void build_subroutines(BuildCtx *ctx) | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | movn TISNIL, #0 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | str RC, SAVE_MULTRES + | str RCw, SAVE_MULTRES | str BASE, L->base | ldr CARG2, LFUNC:CARG2->pc | str xzr, GL->jit_base | mv_vmstate CARG4, INTERP | ldr KBASE, [CARG2, #PC2PROTO(k)] | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb RBw, [PC] + | ldrb RBw, [PC, # OFS_OP] | ldr INSw, [PC], #4 | st_vmstate CARG4 | cmp RBw, #BC_FUNCC+2 // Fast function? @@ -2000,7 +2023,7 @@ static void build_subroutines(BuildCtx *ctx) | ands CARG2, CARG1, #FRAME_TYPE | bne <2 // Trace stitching continuation? | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] + | ldr CARG3w, [CARG1, #-4] | decode_RA CARG1, CARG3 | sub CARG2, BASE, CARG1, lsl #3 | ldr LFUNC:CARG3, [CARG2, #-32] @@ -2153,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1, RC = src2, JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG2, [BASE, RC, lsl #3] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2210,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src1, RC = src2, JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | add RC, BASE, RC, lsl #3 - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG3, [RC] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2271,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src, RC = str_const (~), JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | mvn RC, RC - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG2, [KBASE, RC, lsl #3] | add PC, PC, #4 | movn TMP0, #~LJ_TSTR @@ -2299,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src, RC = num_const (~), JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | add RC, KBASE, RC, lsl #3 - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG3, [RC] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2359,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQP; | // RA = src, RC = primitive_type (~), JMP with RC = target | ldr TMP0, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | add PC, PC, #4 | add RC, RC, #1 | add RB, PC, RB, lsl #2 @@ -2384,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | // RA = dst or unused, RC = src, JMP with RC = target - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr TMP0, [BASE, RC, lsl #3] | add PC, PC, #4 | mov_false TMP1 @@ -2631,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | str PC, SAVE_PC | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | // Returns NULL (finished) or TValue * (metamethod). - | ldrb RBw, [PC, #-1] + | ldrb RBw, [PC, #-4+OFS_RB] | ldr BASE, L->base | cbnz CRET1, ->vmeta_binop | ldr TMP0, [BASE, RB, lsl #3] @@ -3262,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_callt | |5: // Tailcall to a fast function with a Lua frame below. - | ldrb RAw, [PC, #-3] + | ldrb RAw, [PC, #-4+OFS_RA] | sub CARG1, BASE, RA, lsl #3 | ldr LFUNC:CARG1, [CARG1, #-32] | and LFUNC:CARG1, CARG1, #LJ_GCVMASK @@ -3303,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] - | ldrh TMP3w, [PC, #2] - | ldr CARG1w, [RA, #-8] // Get index from control var. + | ldrh TMP3w, [PC, # OFS_RD] + | ldr CARG1w, [RA, #-8+LO] // Get index from control var. | add PC, PC, #4 | add TMP3, PC, TMP3, lsl #2 | and TAB:RB, RB, #LJ_GCVMASK @@ -3323,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stp CARG1, TMP0, [RA] | add CARG1, CARG1, #1 |3: - | str CARG1w, [RA, #-8] // Update control var. + | str CARG1w, [RA, #-8+LO] // Update control var. | mov PC, TMP3 |4: | ins_next @@ -3369,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Despecialize bytecode if any of the checks fail. | mov TMP0, #BC_JMP | mov TMP1, #BC_ITERC - | strb TMP0w, [PC, #-4] - | strb TMP1w, [RC] + | strb TMP0w, [PC, #-4+OFS_OP] + | strb TMP1w, [RC, # OFS_OP] | b <1 break; @@ -3576,7 +3599,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | csel PC, RC, PC, gt } else if (op == BC_JFORI) { | mov PC, RC - | ldrh RCw, [RC, #-2] + | ldrh RCw, [RC, #-4+OFS_RD] } else if (op == BC_IFORL) { | csel PC, RC, PC, le } @@ -3617,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FORI) { | csel PC, RC, PC, hi } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] + | ldrh RCw, [RC, #-4+OFS_RD] | bls =>BC_JLOOP } else if (op == BC_IFORL) { | csel PC, RC, PC, ls