Skip to content

Commit

Permalink
Merge pull request #4210 from degasus/arm
Browse files Browse the repository at this point in the history
JitArm64: Small cleanup + speedups.
  • Loading branch information
degasus committed Sep 27, 2016
2 parents 0cb09ee + 732e0ff commit 3696c2b
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 80 deletions.
9 changes: 5 additions & 4 deletions Source/Core/Common/Arm64Emitter.cpp
Expand Up @@ -3,6 +3,7 @@
// Refer to the license.txt file included.

#include <algorithm>
#include <array>
#include <cstring>
#include <vector>

Expand Down Expand Up @@ -200,10 +201,10 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// To repeat a value every d bits, we multiply it by a number of the form
// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
// be derived using a table lookup on CLZ(d).
static const std::array<uint64_t, 6> multipliers = {
0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL,
0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
};
static const std::array<uint64_t, 6> multipliers = {{
0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL,
0x1111111111111111UL, 0x5555555555555555UL,
}};

int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;

Expand Down
40 changes: 20 additions & 20 deletions Source/Core/Core/PowerPC/JitArm64/Jit.cpp
Expand Up @@ -120,6 +120,26 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
SetJumpTarget(c);
}
}

if (jo.memcheck && (js.op->opinfo->flags & FL_LOADSTORE))
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));

FixupBranch handleException = B();
SwitchToFarCode();
SetJumpTarget(handleException);

gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);

WriteExceptionExit(js.compilerPC);

SwitchToNearCode();
SetJumpTarget(noException);
gpr.Unlock(WA);
}
}

void JitArm64::HLEFunction(UGeckoInstruction inst)
Expand Down Expand Up @@ -598,26 +618,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
// If we have a register that will never be used again, flush it.
gpr.StoreRegisters(~ops[i].gprInUse);
fpr.StoreRegisters(~ops[i].fprInUse);

if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));

FixupBranch handleException = B();
SwitchToFarCode();
SetJumpTarget(handleException);

gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);

WriteExceptionExit(js.compilerPC);

SwitchToNearCode();
SetJumpTarget(noException);
gpr.Unlock(WA);
}
}

i += js.skipInstructions;
Expand Down
7 changes: 2 additions & 5 deletions Source/Core/Core/PowerPC/JitArm64/Jit.h
Expand Up @@ -238,9 +238,6 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void ComputeCarry(bool Carry);
void ComputeCarry();

typedef u32 (*Operation)(u32, u32);
void reg_imm(u32 d, u32 a, u32 value, Operation do_op,
void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg,
ArithOption),
bool Rc = false);
void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false);
};
62 changes: 24 additions & 38 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
Expand Up @@ -75,25 +75,8 @@ void JitArm64::ComputeCarry()
gpr.Unlock(WA);
}

// Following static functions are used in conjunction with reg_imm
static u32 Or(u32 a, u32 b)
{
return a | b;
}

static u32 And(u32 a, u32 b)
{
return a & b;
}

static u32 Xor(u32 a, u32 b)
{
return a ^ b;
}

void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op,
void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, ARM64Reg, ArithOption),
bool Rc)
void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc)
{
if (gpr.IsImm(a))
{
Expand All @@ -105,8 +88,7 @@ void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op,
{
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, value);
(this->*op)(gpr.R(d), gpr.R(a), WA, ArithOption(WA, ST_LSL, 0));
(this->*op)(gpr.R(d), gpr.R(a), value, WA);
gpr.Unlock(WA);

if (Rc)
Expand All @@ -128,22 +110,23 @@ void JitArm64::arith_imm(UGeckoInstruction inst)
// NOP
return;
}
reg_imm(a, s, inst.UIMM, Or, &ARM64XEmitter::ORR);
reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R);
break;
case 25: // oris
reg_imm(a, s, inst.UIMM << 16, Or, &ARM64XEmitter::ORR);
reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R);
break;
case 28: // andi
reg_imm(a, s, inst.UIMM, And, &ARM64XEmitter::AND, true);
reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R, true);
break;
case 29: // andis
reg_imm(a, s, inst.UIMM << 16, And, &ARM64XEmitter::AND, true);
reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R,
true);
break;
case 26: // xori
reg_imm(a, s, inst.UIMM, Xor, &ARM64XEmitter::EOR);
reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R);
break;
case 27: // xoris
reg_imm(a, s, inst.UIMM << 16, Xor, &ARM64XEmitter::EOR);
reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R);
break;
}
}
Expand Down Expand Up @@ -272,37 +255,37 @@ void JitArm64::boolX(UGeckoInstruction inst)
gpr.BindToRegister(a, (a == s) || (a == b));
if (inst.SUBOP10 == 28) // andx
{
AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
AND(gpr.R(a), gpr.R(s), gpr.R(b));
}
else if (inst.SUBOP10 == 476) // nandx
{
AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
AND(gpr.R(a), gpr.R(s), gpr.R(b));
MVN(gpr.R(a), gpr.R(a));
}
else if (inst.SUBOP10 == 60) // andcx
{
BIC(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
BIC(gpr.R(a), gpr.R(s), gpr.R(b));
}
else if (inst.SUBOP10 == 444) // orx
{
ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
ORR(gpr.R(a), gpr.R(s), gpr.R(b));
}
else if (inst.SUBOP10 == 124) // norx
{
ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
ORR(gpr.R(a), gpr.R(s), gpr.R(b));
MVN(gpr.R(a), gpr.R(a));
}
else if (inst.SUBOP10 == 412) // orcx
{
ORN(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
ORN(gpr.R(a), gpr.R(s), gpr.R(b));
}
else if (inst.SUBOP10 == 316) // xorx
{
EOR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
EOR(gpr.R(a), gpr.R(s), gpr.R(b));
}
else if (inst.SUBOP10 == 284) // eqvx
{
EON(gpr.R(a), gpr.R(b), gpr.R(s), ArithOption(gpr.R(a), ST_LSL, 0));
EON(gpr.R(a), gpr.R(b), gpr.R(s));
}
else
{
Expand Down Expand Up @@ -418,7 +401,7 @@ void JitArm64::negx(UGeckoInstruction inst)
else
{
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), WSP, gpr.R(a), ArithOption(gpr.R(a), ST_LSL, 0));
SUB(gpr.R(d), WSP, gpr.R(a));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
Expand Down Expand Up @@ -692,8 +675,11 @@ void JitArm64::addic(UGeckoInstruction inst)
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, imm);
ADDS(gpr.R(d), gpr.R(a), WA);
MOVI2R(WA, std::abs(simm));
if (simm < 0)
SUBS(gpr.R(d), gpr.R(a), WA);
else
ADDS(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}

Expand Down
25 changes: 12 additions & 13 deletions Source/Core/VideoCommon/VertexLoaderARM64.cpp
Expand Up @@ -9,19 +9,18 @@

using namespace Arm64Gen;

ARM64Reg src_reg = X0;
ARM64Reg dst_reg = X1;
ARM64Reg count_reg = W2;
ARM64Reg skipped_reg = W17;
ARM64Reg scratch1_reg = W16;
ARM64Reg scratch2_reg = W15;
ARM64Reg scratch3_reg = W14;
ARM64Reg scratch4_reg = W13;
ARM64Reg saved_count = W12;

ARM64Reg stride_reg = X11;
ARM64Reg arraybase_reg = X10;
ARM64Reg scale_reg = X9;
constexpr ARM64Reg src_reg = X0;
constexpr ARM64Reg dst_reg = X1;
constexpr ARM64Reg count_reg = W2;
constexpr ARM64Reg skipped_reg = W17;
constexpr ARM64Reg scratch1_reg = W16;
constexpr ARM64Reg scratch2_reg = W15;
constexpr ARM64Reg scratch3_reg = W14;
constexpr ARM64Reg saved_count = W12;

constexpr ARM64Reg stride_reg = X11;
constexpr ARM64Reg arraybase_reg = X10;
constexpr ARM64Reg scale_reg = X9;

alignas(16) static const float scale_factors[] = {
1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3),
Expand Down

0 comments on commit 3696c2b

Please sign in to comment.