Skip to content

Commit

Permalink
Merge pull request LuaJIT#5 from sindrom91/aarch64-v2.1-new
Browse files Browse the repository at this point in the history
Aarch64 v2.1 new
  • Loading branch information
cbaylis committed Jun 21, 2016
2 parents d5bda33 + a3f2aec commit d46bf62
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 99 deletions.
27 changes: 20 additions & 7 deletions src/lj_asm_arm64.h
Expand Up @@ -272,6 +272,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
int stfp = (st == IRT_NUM || st == IRT_FLOAT);
IRRef lref = ir->op1;
/* 64 bit integer conversions are handled by SPLIT. */
/* TODO: 64-bit conversions should be handled here? */
lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));
lua_assert(irt_type(ir->t) != st);
if (irt_isfp(ir->t)) {
Expand Down Expand Up @@ -583,6 +584,11 @@ static void asm_sload(ASMState *as, IRIns *ir)
asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
emit_opk(as, A64I_CMNx, 0, type, -irt_toitype(ir->t), allow);
}

if (ra_hasreg(type)) {
emit_dn(as, A64I_ASRx|A64F_IR(47), type, dest);
}

if (ra_hasreg(dest)) {
if (t == IRT_NUM) {
if (check_offset(A64I_LDRd, ofs) != OFS_INVALID) {
Expand All @@ -597,7 +603,6 @@ static void asm_sload(ASMState *as, IRIns *ir)
} else
emit_lso(as, A64I_LDRx, dest, base, ofs); /* !!!!TODO w or x */
}
if (ra_hasreg(type)) emit_lso(as, A64I_LDRx, type, base, ofs+4);
}

/* -- Allocations --------------------------------------------------------- */
Expand Down Expand Up @@ -635,7 +640,9 @@ static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)

static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
{
lua_unimpl();
Reg dest = ra_dest(as, ir, RSET_FPR);
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
emit_dn(as, ai, (dest & 31), (left & 31));
}

static void asm_callround(ASMState *as, IRIns *ir, int id)
Expand Down Expand Up @@ -703,7 +710,9 @@ static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)

static void asm_intneg(ASMState *as, IRIns *ir, A64Ins ai)
{
lua_unimpl();
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
emit_dm(as, ai, dest, left);
}

/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
Expand Down Expand Up @@ -754,19 +763,23 @@ static void asm_mul(ASMState *as, IRIns *ir)
#define asm_subov(as, ir) asm_sub(as, ir)
#define asm_mulov(as, ir) asm_mul(as, ir)

#if !LJ_SOFTFP
#define asm_div(as, ir) asm_fparith(as, ir, /*ARMI_VDIV_D*/0)
#define asm_div(as, ir) asm_fparith(as, ir, A64I_FDIVd)
#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
#define asm_abs(as, ir) asm_fpunary(as, ir, /*ARMI_VABS_D*/0)
#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
#endif

#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)

static void asm_neg(ASMState *as, IRIns *ir)
{
lua_unimpl();
#if !LJ_SOFTFP
if (irt_isnum(ir->t)) {
asm_fpunary(as, ir, A64I_FNEGd);
return;
}
#endif
asm_intneg(as, ir, A64I_NEGx);
}

static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai)
Expand Down
200 changes: 109 additions & 91 deletions src/lj_emit_arm64.h
Expand Up @@ -16,6 +16,30 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
#define emit_getgl(as, r, field) \
emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)

static void emit_n(ASMState *as, A64Ins ai, Reg rn)
{
*--as->mcp = ai | A64F_N(rn);
}

static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
{
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
}

static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
{
*--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
}

static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
{
*--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
}

static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
{
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
}

/* Encode constant in K12 format for data processing instructions. */
static uint32_t emit_isk12(A64Ins ai, int32_t n)
Expand All @@ -31,6 +55,72 @@ static uint32_t emit_isk12(A64Ins ai, int32_t n)
return -1;
}

/* -- Emit loads/stores --------------------------------------------------- */

typedef enum {
OFS_INVALID,
OFS_UNSCALED,
OFS_SCALED_0,
OFS_SCALED_1,
OFS_SCALED_2,
OFS_SCALED_3,
} ofs_type;

static ofs_type check_offset(A64Ins ai, int32_t ofs)
{
int scale;
switch (ai)
{
case A64I_LDRBw: scale = 0; break;
case A64I_STRBw: scale = 0; break;
case A64I_LDRHw: scale = 1; break;
case A64I_STRHw: scale = 1; break;
case A64I_LDRw: scale = 2; break;
case A64I_STRw: scale = 2; break;
case A64I_LDRx: scale = 3; break;
case A64I_STRx: scale = 3; break;
case A64I_LDRd: scale = 3; break;
case A64I_STRd: scale = 3; break;
default: lua_assert(!"invalid instruction in check_offset");
}

/* do we need to use unscaled op? */
if (ofs < 0 || (ofs & ((1<<scale)-1)))
{
/* unaligned, so need to use u variant (eg ldur) */
return (ofs >= -256 && ofs <= 255) ? OFS_UNSCALED : OFS_INVALID;
} else {
return (ofs >= 0 && ofs <= (4096<<scale)) ? OFS_SCALED_0 + scale : OFS_INVALID;
}
}

static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int32_t ofs)
{
/* !!!TODO ARM emit_lso combines LDR/STR pairs into LDRD/STRD, something
similar possible here? */
ofs_type ot = check_offset(ai, ofs);
lua_assert(ot != OFS_INVALID);
if (ot == OFS_UNSCALED) {
ai ^= A64I_LS_U;
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_A_U(ofs & 0x1ff);
} else {
int32_t ofs_field;
ofs_field = ofs >> (ot - OFS_SCALED_0);
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_A(ofs_field);
}
}

static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
{
int64_t i = i64ptr(p);
Reg tmp = RID_TMP; /*!!!TODO allocate register? */
emit_lso(as, ai, r, tmp, (i & 0xffff));
*--as->mcp = A64I_MOVK_48x | A64F_D(tmp) | A64F_U16((i>>48) & 0xffff);
*--as->mcp = A64I_MOVK_32x | A64F_D(tmp) | A64F_U16((i>>32) & 0xffff);
*--as->mcp = A64I_MOVK_16x | A64F_D(tmp) | A64F_U16((i>>16) & 0xffff);

}

/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg rd, int32_t i)
{
Expand Down Expand Up @@ -60,7 +150,14 @@ static void emit_loadu64(ASMState *as, Reg rd, uint64_t u64)
/* Generic load of register with base and (small) offset address. */
static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
lua_unimpl();
#if LJ_SOFTFP
lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
#else
if (r >= RID_MAX_GPR)
emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, r, base, ofs);
else
#endif
emit_lso(as, A64I_LDRx, r, base, ofs);
}

/* Generic store of register with base and (small) offset address. */
Expand All @@ -72,32 +169,19 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
/* Generic move between two regs. */
static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
{
lua_unimpl();
}

static void emit_n(ASMState *as, A64Ins ai, Reg rn)
{
*--as->mcp = ai | A64F_N(rn);
}

static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
{
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
}
#if LJ_SOFTFP
lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
#else
if (dst >= RID_MAX_GPR) {
emit_dm(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
(dst & 31), (src & 31));
return;
}
#endif

static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
{
*--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
}
// TODO: add swapping early registers for loads/stores?

static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
{
*--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
}

static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
{
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
emit_dm(as, A64I_MOVx, dst, src);
}

/* Emit an arithmetic/logic operation with a constant operand. */
Expand Down Expand Up @@ -144,72 +228,6 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
emit_loadu64(as, r64, k);
}

/* -- Emit loads/stores --------------------------------------------------- */

typedef enum {
OFS_INVALID,
OFS_UNSCALED,
OFS_SCALED_0,
OFS_SCALED_1,
OFS_SCALED_2,
OFS_SCALED_3,
} ofs_type;

static ofs_type check_offset(A64Ins ai, int32_t ofs)
{
int scale;
switch (ai)
{
case A64I_LDRBw: scale = 0; break;
case A64I_STRBw: scale = 0; break;
case A64I_LDRHw: scale = 1; break;
case A64I_STRHw: scale = 1; break;
case A64I_LDRw: scale = 2; break;
case A64I_STRw: scale = 2; break;
case A64I_LDRx: scale = 3; break;
case A64I_STRx: scale = 3; break;
case A64I_LDRd: scale = 3; break;
case A64I_STRd: scale = 3; break;
default: lua_assert(!"invalid instruction in check_offset");
}

/* do we need to use unscaled op? */
if (ofs < 0 || (ofs & ((1<<scale)-1)))
{
/* unaligned, so need to use u variant (eg ldur) */
return (ofs >= -256 && ofs <= 255) ? OFS_UNSCALED : OFS_INVALID;
} else {
return (ofs >= 0 && ofs <= (4096<<scale)) ? OFS_SCALED_0 + scale : OFS_INVALID;
}
}

static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int32_t ofs)
{
/* !!!TODO ARM emit_lso combines LDR/STR pairs into LDRD/STRD, something
similar possible here? */
ofs_type ot = check_offset(ai, ofs);
lua_assert(ot != OFS_INVALID);
if (ot == OFS_UNSCALED) {
ai ^= A64I_LS_U;
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_A_U(ofs & 0x1ff);
} else {
int32_t ofs_field;
ofs_field = ofs >> (ot - OFS_SCALED_0);
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_A(ofs_field);
}
}

static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
{
int64_t i = i64ptr(p);
Reg tmp = RID_TMP; /*!!!TODO allocate register? */
emit_lso(as, ai, r, tmp, (i & 0xffff));
*--as->mcp = A64I_MOVK_48x | A64F_D(tmp) | A64F_U16((i>>48) & 0xffff);
*--as->mcp = A64I_MOVK_32x | A64F_D(tmp) | A64F_U16((i>>32) & 0xffff);
*--as->mcp = A64I_MOVK_16x | A64F_D(tmp) | A64F_U16((i>>16) & 0xffff);

}

/* -- Emit control-flow instructions -------------------------------------- */

static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
Expand Down
9 changes: 8 additions & 1 deletion src/lj_target_arm64.h
Expand Up @@ -196,9 +196,12 @@ typedef enum A64Ins {
A64I_SUBd = 0x7ee08400,
A64I_FMADDd = 0x1f400000,
A64I_FMULd = 0x1e600800,
A64I_FMULs = 0x1e200800,
A64I_FDIVd = 0x1e601800,
A64I_FNEGd = 0x1e614000,
A64I_STRd = 0xfd000000, /* str d0,[x0] */
A64I_LDRd = 0xfd400000, /* ldr d0,[x0] */
A64I_STRs = 0xbd000000, /* str s0,[x0] */
A64I_LDRs = 0xbd400000, /* ldr s0,[x0] */

A64I_FCVT_F32_F64 = 0x1e624000,
A64I_FCVT_F64_F32 = 0x1e22c000,
Expand All @@ -211,6 +214,8 @@ typedef enum A64Ins {
A64I_FCVT_U32_F64 = 0x1e650000,
A64I_FCVT_U32_F32 = 0x1e250000,

A64I_FMOV_S = 0x1e200000,
A64I_FMOV_D = 0x1e600000,
A64I_FMOV_R_S = 0x1e260000,
A64I_FMOV_S_R = 0x1e270000,
A64I_FMOV_R_D = 0x9e660000,
Expand All @@ -221,6 +226,8 @@ typedef enum A64Ins {
A64I_CMPx = A64I_SUBSx | A64F_D (RID_ZERO),
A64I_CMNx = A64I_ADDSx | A64F_D (RID_ZERO),
A64I_CMNw = A64I_ADDSw | A64F_D (RID_ZERO),
A64I_NEGw = A64I_SUBw | A64F_N (RID_ZERO),
A64I_NEGx = A64I_SUBx | A64F_N (RID_ZERO),

/* fields */
A64I_BINOPk = 0x1a000000, /* A64I_ADDx^A64I_BINOPk => ADD x0,x0,0 */
Expand Down

0 comments on commit d46bf62

Please sign in to comment.