Permalink
Browse files

Merge pull request #10471 from unknownbrackets/arm64-jit

More arm64jit improvements
  • Loading branch information...
hrydgard committed Dec 30, 2017
2 parents 364a266 + 0fc8274 commit eb35e88288c9566da192f3c5f4a88da6accb4037
View
@@ -1547,6 +1547,10 @@ void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)
{
EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);
}
void ARM64XEmitter::CMN(ARM64Reg Rn, u32 imm, bool shift)
{
EncodeAddSubImmInst(0, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);
}
// Data Processing (Immediate)
void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)
@@ -3664,56 +3668,42 @@ void ARM64FloatEmitter::ABI_PopRegisters(uint32_t registers, uint32_t fp_registe
}
void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
unsigned int n, imm_s, imm_r;
// It's probably okay to AND by extra bits.
if (!Is64Bit(Rn))
imm &= 0xFFFFFFFF;
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
AND(Rd, Rn, imm_r, imm_s, n != 0);
} else if (imm == 0) {
MOVI2R(Rd, 0);
} else {
if (!TryANDI2R(Rd, Rn, imm)) {
_assert_msg_(JIT, scratch != INVALID_REG, "ANDI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
AND(Rd, Rn, scratch);
}
}
void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
unsigned int n, imm_s, imm_r;
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
ORR(Rd, Rn, imm_r, imm_s, n != 0);
} else if (imm == 0) {
if (Rd != Rn) {
MOV(Rd, Rn);
}
} else {
_assert_msg_(JIT, Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "ORRI2R - more bits in imm than Rn");
if (!TryORRI2R(Rd, Rn, imm)) {
_assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
ORR(Rd, Rn, scratch);
}
}
void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
unsigned int n, imm_s, imm_r;
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
EOR(Rd, Rn, imm_r, imm_s, n != 0);
} else if (imm == 0) {
if (Rd != Rn) {
MOV(Rd, Rn);
}
} else {
_assert_msg_(JIT, Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "EORI2R - more bits in imm than Rn");
if (!TryEORI2R(Rd, Rn, imm)) {
_assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
EOR(Rd, Rn, scratch);
}
}
void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
if (!Is64Bit(Rn))
imm &= 0xFFFFFFFF;
unsigned int n, imm_s, imm_r;
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
ANDS(Rd, Rn, imm_r, imm_s, n != 0);
} else if (imm == 0) {
ANDS(Rd, Rn, Is64Bit(Rn) ? ZR : WZR, ArithOption(Rd, ST_LSL, 0));
ANDS(Rd, Rn, Is64Bit(Rn) ? ZR : WZR);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
@@ -3722,77 +3712,79 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
}
void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift)) {
ADD(Rd, Rn, val, shift);
} else {
if (!TryADDI2R(Rd, Rn, imm)) {
_assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
ADD(Rd, Rn, scratch);
}
}
void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift)) {
SUB(Rd, Rn, val, shift);
} else {
if (!TrySUBI2R(Rd, Rn, imm)) {
_assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
SUB(Rd, Rn, scratch);
}
}
void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift)) {
CMP(Rn, val, shift);
} else {
if (!TryCMPI2R(Rn, imm)) {
_assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
CMP(Rn, scratch);
}
}
bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift)) {
if (imm == 0) {
// Prefer MOV (ORR) instead of ADD for moves.
MOV(Rd, Rn);
return true;
} else if (IsImmArithmetic(imm, &val, &shift)) {
ADD(Rd, Rn, val, shift);
return true;
} else {
return false;
}
}
bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift)) {
if (imm == 0) {
// Prefer MOV (ORR) instead of ADD for moves.
MOV(Rd, Rn);
return true;
} else if (IsImmArithmetic(imm, &val, &shift)) {
SUB(Rd, Rn, val, shift);
return true;
} else {
return false;
}
}
bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm) {
bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm) {
s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift)) {
CMP(Rn, val, shift);
return true;
} else if (IsImmArithmetic((u64)negated, &val, &shift)) {
CMN(Rn, val, shift);
return true;
} else {
return false;
}
}
bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
if (!Is64Bit(Rn))
imm &= 0xFFFFFFFF;
u32 n, imm_r, imm_s;
if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r)) {
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
AND(Rd, Rn, imm_r, imm_s, n != 0);
return true;
} else if (imm == 0) {
@@ -3802,9 +3794,10 @@ bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
return false;
}
}
bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
_assert_msg_(JIT, Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "TryORRI2R - more bits in imm than Rn");
u32 n, imm_r, imm_s;
if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r)) {
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
ORR(Rd, Rn, imm_r, imm_s, n != 0);
return true;
} else if (imm == 0) {
@@ -3816,9 +3809,10 @@ bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
return false;
}
}
bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
_assert_msg_(JIT, Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "TryEORI2R - more bits in imm than Rn");
u32 n, imm_r, imm_s;
if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r)) {
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
EOR(Rd, Rn, imm_r, imm_s, n != 0);
return true;
} else if (imm == 0) {
View
@@ -591,6 +591,7 @@ class ARM64XEmitter
void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
void CMN(ARM64Reg Rn, u32 imm, bool shift = false);
// Data Processing (Immediate)
void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
@@ -714,13 +715,13 @@ class ARM64XEmitter
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryCMPI2R(ARM64Reg Rn, u32 imm);
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
bool TryCMPI2R(ARM64Reg Rn, u64 imm);
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
// Pseudo-instruction for convenience. PUSH pushes 16 bytes even though we only push a single register.
// This is so the stack pointer is always 16-byte aligned, which is checked by hardware!
@@ -57,7 +57,7 @@ static u32 EvalAnd(u32 a, u32 b) { return a & b; }
static u32 EvalAdd(u32 a, u32 b) { return a + b; }
static u32 EvalSub(u32 a, u32 b) { return a - b; }
void Arm64Jit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg src, ARM64Reg src2), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b)) {
void Arm64Jit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg src, ARM64Reg src2), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg src, u64 val), u32 (*eval)(u32 a, u32 b)) {
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, (*eval)(gpr.GetImm(rs), uimm));
} else {
@@ -119,7 +119,7 @@ void Arm64Jit::Comp_IType(MIPSOpcode op) {
break;
}
gpr.MapDirtyIn(rt, rs);
if (!TryCMPI2R(gpr.R(rs), simm)) {
if (!TryCMPI2R(gpr.R(rs), (u32)simm)) {
gpr.SetRegImm(SCRATCH1, simm);
CMP(gpr.R(rs), SCRATCH1);
}
@@ -196,7 +196,7 @@ void Arm64Jit::Comp_RType2(MIPSOpcode op) {
}
}
void Arm64Jit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg rm, ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg rm, u32 val), u32(*eval)(u32 a, u32 b), bool symmetric) {
void Arm64Jit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg rm, ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg rm, u64 val), u32(*eval)(u32 a, u32 b), bool symmetric) {
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, (*eval)(gpr.GetImm(rs), gpr.GetImm(rt)));
return;
@@ -262,7 +262,16 @@ void Arm64Jit::Comp_RType3(MIPSOpcode op) {
case 32: //R(rd) = R(rs) + R(rt); break; //add
case 33: //R(rd) = R(rs) + R(rt); break; //addu
CompType3(rd, rs, rt, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd, true);
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0 && !gpr.IsImm(rt)) {
// Special case: actually a mov, avoid arithmetic.
gpr.MapDirtyIn(rd, rt);
MOV(gpr.R(rd), gpr.R(rt));
} else if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0 && !gpr.IsImm(rs)) {
gpr.MapDirtyIn(rd, rs);
MOV(gpr.R(rd), gpr.R(rs));
} else {
CompType3(rd, rs, rt, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd, true);
}
break;
case 34: //R(rd) = R(rs) - R(rt); break; //sub
@@ -392,13 +401,11 @@ void Arm64Jit::CompShiftVar(MIPSOpcode op, Arm64Gen::ShiftType shiftType) {
return;
}
gpr.MapDirtyInIn(rd, rs, rt);
// Not sure if ARM64 wraps like this so let's do it for it. (TODO: According to the ARM ARM, it will indeed mask for us so this is not necessary)
ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG);
switch (shiftType) {
case ST_LSL: LSLV(gpr.R(rd), gpr.R(rt), SCRATCH1); break;
case ST_LSR: LSRV(gpr.R(rd), gpr.R(rt), SCRATCH1); break;
case ST_ASR: ASRV(gpr.R(rd), gpr.R(rt), SCRATCH1); break;
case ST_ROR: RORV(gpr.R(rd), gpr.R(rt), SCRATCH1); break;
case ST_LSL: LSLV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;
case ST_LSR: LSRV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;
case ST_ASR: ASRV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;
case ST_ROR: RORV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;
}
}
Oops, something went wrong.

0 comments on commit eb35e88

Please sign in to comment.