Skip to content

Commit

Permalink
ARM64: Add big-endian support.
Browse files Browse the repository at this point in the history
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
  • Loading branch information
Mike Pall committed Mar 30, 2017
1 parent 78f5f1c commit 3143b21
Show file tree
Hide file tree
Showing 14 changed files with 149 additions and 63 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ FILE_PC= luajit.pc
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \
dis_mips64el.lua vmdef.lua
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
dis_mips64.lua dis_mips64el.lua vmdef.lua

ifeq (,$(findstring Windows,$(OS)))
HOST_SYS:= $(shell uname -s)
Expand Down
3 changes: 3 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
endif
TARGET_LJARCH= arm64
else
ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
Expand Down
8 changes: 6 additions & 2 deletions src/host/buildvm_asm.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
{
int i;
for (i = 0; i < n; i += 4) {
uint32_t ins = *(uint32_t *)(p+i);
#if LJ_TARGET_ARM64 && LJ_BE
ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
#endif
if ((i & 15) == 0)
fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i));
fprintf(ctx->fp, "\t.long 0x%08x", ins);
else
fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i));
fprintf(ctx->fp, ",0x%08x", ins);
if ((i & 15) == 12) putc('\n', ctx->fp);
}
if ((n & 15) != 0) putc('\n', ctx->fp);
Expand Down
8 changes: 4 additions & 4 deletions src/jit/bcsave.lua
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ local map_type = {
}

local map_arch = {
x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
mips = true, mipsel = true,
x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
ppc = true, mips = true, mipsel = true,
}

local map_os = {
Expand Down Expand Up @@ -200,7 +200,7 @@ typedef struct {
]]
local symname = LJBC_PREFIX..ctx.modname
local is64, isbe = false, false
if ctx.arch == "x64" or ctx.arch == "arm64" then
if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then
is64 = true
elseif ctx.arch == "ppc" or ctx.arch == "mips" then
isbe = true
Expand Down Expand Up @@ -237,7 +237,7 @@ typedef struct {
hdr.eendian = isbe and 2 or 1
hdr.eversion = 1
hdr.type = f16(1)
hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
if ctx.arch == "mips" or ctx.arch == "mipsel" then
hdr.flags = f32(0x50001006)
end
Expand Down
12 changes: 12 additions & 0 deletions src/jit/dis_arm64be.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module.
--
-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the
-- common ARM64 disassembler module. All the interesting stuff is there.
------------------------------------------------------------------------------

return require((string.match(..., ".*%.") or "").."dis_arm64")

10 changes: 6 additions & 4 deletions src/lj_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,14 @@

#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64

#define LJ_ARCH_NAME "arm64"
#define LJ_ARCH_BITS 64
#if defined(__AARCH64EB__)
#define LJ_ARCH_NAME "arm64be"
#define LJ_ARCH_ENDIAN LUAJIT_BE
#else
#define LJ_ARCH_NAME "arm64"
#define LJ_ARCH_ENDIAN LUAJIT_LE
#endif
#define LJ_TARGET_ARM64 1
#define LJ_TARGET_EHRETREG 0
#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
Expand Down Expand Up @@ -409,9 +414,6 @@
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#endif
#elif LJ_TARGET_ARM64
#if defined(__AARCH64EB__)
#error "No support for big-endian ARM64"
#endif
#if defined(_ILP32)
#error "No support for ILP32 model on ARM64"
#endif
Expand Down
3 changes: 3 additions & 0 deletions src/lj_asm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2393,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
if (!as->loopref)
asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
#if LJ_TARGET_MCODE_FIXUP
asm_mcode_fixup(T->mcode, T->szmcode);
#endif
lj_mcode_sync(T->mcode, origtop);
}

Expand Down
42 changes: 28 additions & 14 deletions src/lj_asm_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
asm_mclimit(as);
/* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
for (i = nexits-1; (int32_t)i >= 0; i--)
*--mxp = A64I_BL|((-3-i)&0x03ffffffu);
*--mxp = A64I_MOVZw|A64F_U16(as->T->traceno);
*--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
*--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
mxp--;
*mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu);
*--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP);
*mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
*--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
as->mctop = mxp;
}

Expand Down Expand Up @@ -431,7 +431,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
fpr++;
} else {
Reg r = ra_alloc1(as, ref, RSET_FPR);
emit_spstore(as, ir, r, ofs);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
ofs += 8;
}
} else {
Expand All @@ -441,7 +441,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++;
} else {
Reg r = ra_alloc1(as, ref, RSET_GPR);
emit_spstore(as, ir, r, ofs);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
ofs += 8;
}
}
Expand Down Expand Up @@ -1082,7 +1082,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
src = ra_alloc1(as, ir->op2, allow);
rset_clear(allow, src);
if (irt_isinteger(ir->t))
type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow);
type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
else
type = ra_allock(as, irt_toitype(ir->t), allow);
} else {
Expand Down Expand Up @@ -1179,7 +1179,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
}
if (ra_hasreg(dest)) {
emit_lso(as, irt_isnum(t) ? A64I_LDRd :
(irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs);
(irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
}
}

Expand Down Expand Up @@ -1909,7 +1910,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
if (spadj == 0) {
*--p = A64I_NOP;
*--p = A64I_LE(A64I_NOP);
as->mctop = p;
} else {
/* Patch stack adjustment. */
Expand Down Expand Up @@ -1962,6 +1963,19 @@ static void asm_setup_target(ASMState *as)
asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
}

#if LJ_BE
/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
static void asm_mcode_fixup(MCode *mcode, MSize size)
{
MCode *pe = (MCode *)((char *)mcode + size);
while (mcode < pe) {
MCode ins = *mcode;
*mcode++ = lj_bswap(ins);
}
}
#define LJ_TARGET_MCODE_FIXUP 1
#endif

/* -- Trace patching ------------------------------------------------------ */

/* Patch exit jumps of existing machine code to a new target. */
Expand All @@ -1974,29 +1988,29 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
MCode *px = exitstub_trace_addr(T, exitno);
for (; p < pe; p++) {
/* Look for exitstub branch, replace with branch to target. */
uint32_t ins = *p;
MCode ins = A64I_LE(*p);
if ((ins & 0xff000000u) == 0x54000000u &&
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
/* Patch bcc exitstub. */
*p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u);
*p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u));
cend = p+1;
if (!cstart) cstart = p;
} else if ((ins & 0xfc000000u) == 0x14000000u &&
((ins ^ (px-p)) & 0x03ffffffu) == 0) {
/* Patch b exitstub. */
*p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu);
*p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
cend = p+1;
if (!cstart) cstart = p;
} else if ((ins & 0x7e000000u) == 0x34000000u &&
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
/* Patch cbz/cbnz exitstub. */
*p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u);
*p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
cend = p+1;
if (!cstart) cstart = p;
} else if ((ins & 0x7e000000u) == 0x36000000u &&
((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
/* Patch tbz/tbnz exitstub. */
*p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u);
*p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u));
cend = p+1;
if (!cstart) cstart = p;
}
Expand Down
20 changes: 17 additions & 3 deletions src/lj_ccall.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@
unsigned int cl = ccall_classify_struct(cts, ctr); \
if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
CTSize i = (cl >> 8) - 1; \
do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
} else { \
if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
memcpy(dp, sp, ctr->size); \
Expand Down Expand Up @@ -359,6 +359,13 @@
} \
}

#if LJ_BE
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#endif


#elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */

Expand Down Expand Up @@ -1033,9 +1040,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_ARM64 && LJ_BE
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
(isfp && nsp == 0)) && d->size <= 4) {
|| (isfp && nsp == 0)
#endif
) && d->size <= 4) {
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/lj_ccall.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ typedef union FPRArg {
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
float f;
uint32_t u32;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
} FPRArg;

#elif LJ_TARGET_PPC
Expand Down
18 changes: 11 additions & 7 deletions src/lj_ccallback.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
*p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
*p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
*p++ = A64I_BR | A64F_N(RID_X11);
*p++ = A64I_NOP;
*p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
*p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
*p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
*p++ = A64I_LE(A64I_NOP);
((void **)p)[0] = target;
((void **)p)[1] = g;
p += 4;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
*p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
*p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
*p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
p++;
}
lua_assert(p - page <= CALLBACK_MCODE_SIZE);
Expand Down Expand Up @@ -623,6 +623,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
#if CCALL_NUM_FPR
if (ctype_isfp(ctr->info))
dp = (uint8_t *)&cts->cb.fpr[0];
#endif
#if LJ_TARGET_ARM64 && LJ_BE
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
dp = (uint8_t *)&cts->cb.fpr[0].f[1];
#endif
lj_cconv_ct_tv(cts, ctr, dp, o, 0);
#ifdef CALLBACK_HANDLE_RET
Expand All @@ -637,7 +641,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_MIPS64
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
Expand Down
2 changes: 1 addition & 1 deletion src/lj_emit_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
} else {
goto nopair;
}
if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) {
if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
*as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
return;
Expand Down
9 changes: 8 additions & 1 deletion src/lj_target_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ typedef struct {
/* Return the address of a per-trace exit stub. */
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */
while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
return p + 3 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
Expand All @@ -116,6 +116,13 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)

/* -- Instructions -------------------------------------------------------- */

/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
#if LJ_BE
#define A64I_LE(x) (lj_bswap(x))
#else
#define A64I_LE(x) (x)
#endif

/* Instruction fields. */
#define A64F_D(r) (r)
#define A64F_N(r) ((r) << 5)
Expand Down
Loading

0 comments on commit 3143b21

Please sign in to comment.