Merge pull request #4210 from degasus/arm

JitArm64: Small cleanup + speedups.
dolphin-emu · Sep 27, 2016 · 3696c2b · 3696c2b
2 parents 0cb09ee + 732e0ff
commit 3696c2b
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 80 deletions.
diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <array>
 #include <cstring>
 #include <vector>
 
@@ -200,10 +201,10 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
   // To repeat a value every d bits, we multiply it by a number of the form
   // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
   // be derived using a table lookup on CLZ(d).
-  static const std::array<uint64_t, 6> multipliers = {
-      0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL,
-      0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
-  };
+  static const std::array<uint64_t, 6> multipliers = {{
+      0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL,
+      0x1111111111111111UL, 0x5555555555555555UL,
+  }};
 
   int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
 

diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
@@ -120,6 +120,26 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
       SetJumpTarget(c);
     }
   }
+
+  if (jo.memcheck && (js.op->opinfo->flags & FL_LOADSTORE))
+  {
+    ARM64Reg WA = gpr.GetReg();
+    LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
+    FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));
+
+    FixupBranch handleException = B();
+    SwitchToFarCode();
+    SetJumpTarget(handleException);
+
+    gpr.Flush(FLUSH_MAINTAIN_STATE);
+    fpr.Flush(FLUSH_MAINTAIN_STATE);
+
+    WriteExceptionExit(js.compilerPC);
+
+    SwitchToNearCode();
+    SetJumpTarget(noException);
+    gpr.Unlock(WA);
+  }
 }
 
 void JitArm64::HLEFunction(UGeckoInstruction inst)
@@ -598,26 +618,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
       // If we have a register that will never be used again, flush it.
       gpr.StoreRegisters(~ops[i].gprInUse);
       fpr.StoreRegisters(~ops[i].fprInUse);
-
-      if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
-      {
-        ARM64Reg WA = gpr.GetReg();
-        LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
-        FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));
-
-        FixupBranch handleException = B();
-        SwitchToFarCode();
-        SetJumpTarget(handleException);
-
-        gpr.Flush(FLUSH_MAINTAIN_STATE);
-        fpr.Flush(FLUSH_MAINTAIN_STATE);
-
-        WriteExceptionExit(js.compilerPC);
-
-        SwitchToNearCode();
-        SetJumpTarget(noException);
-        gpr.Unlock(WA);
-      }
     }
 
     i += js.skipInstructions;

diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h
@@ -238,9 +238,6 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
   void ComputeCarry(bool Carry);
   void ComputeCarry();
 
-  typedef u32 (*Operation)(u32, u32);
-  void reg_imm(u32 d, u32 a, u32 value, Operation do_op,
-               void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg,
-                                         ArithOption),
-               bool Rc = false);
+  void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
+               void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false);
 };
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
@@ -75,25 +75,8 @@ void JitArm64::ComputeCarry()
   gpr.Unlock(WA);
 }
 
-// Following static functions are used in conjunction with reg_imm
-static u32 Or(u32 a, u32 b)
-{
-  return a | b;
-}
-
-static u32 And(u32 a, u32 b)
-{
-  return a & b;
-}
-
-static u32 Xor(u32 a, u32 b)
-{
-  return a ^ b;
-}
-
-void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op,
-                       void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, ARM64Reg, ArithOption),
-                       bool Rc)
+void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
+                       void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc)
 {
   if (gpr.IsImm(a))
   {
@@ -105,8 +88,7 @@ void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op,
   {
     gpr.BindToRegister(d, d == a);
     ARM64Reg WA = gpr.GetReg();
-    MOVI2R(WA, value);
-    (this->*op)(gpr.R(d), gpr.R(a), WA, ArithOption(WA, ST_LSL, 0));
+    (this->*op)(gpr.R(d), gpr.R(a), value, WA);
     gpr.Unlock(WA);
 
     if (Rc)
@@ -128,22 +110,23 @@ void JitArm64::arith_imm(UGeckoInstruction inst)
       // NOP
       return;
     }
-    reg_imm(a, s, inst.UIMM, Or, &ARM64XEmitter::ORR);
+    reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R);
     break;
   case 25:  // oris
-    reg_imm(a, s, inst.UIMM << 16, Or, &ARM64XEmitter::ORR);
+    reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R);
     break;
   case 28:  // andi
-    reg_imm(a, s, inst.UIMM, And, &ARM64XEmitter::AND, true);
+    reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R, true);
     break;
   case 29:  // andis
-    reg_imm(a, s, inst.UIMM << 16, And, &ARM64XEmitter::AND, true);
+    reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R,
+            true);
     break;
   case 26:  // xori
-    reg_imm(a, s, inst.UIMM, Xor, &ARM64XEmitter::EOR);
+    reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R);
     break;
   case 27:  // xoris
-    reg_imm(a, s, inst.UIMM << 16, Xor, &ARM64XEmitter::EOR);
+    reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R);
     break;
   }
 }
@@ -272,37 +255,37 @@ void JitArm64::boolX(UGeckoInstruction inst)
     gpr.BindToRegister(a, (a == s) || (a == b));
     if (inst.SUBOP10 == 28)  // andx
     {
-      AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      AND(gpr.R(a), gpr.R(s), gpr.R(b));
     }
     else if (inst.SUBOP10 == 476)  // nandx
     {
-      AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      AND(gpr.R(a), gpr.R(s), gpr.R(b));
       MVN(gpr.R(a), gpr.R(a));
     }
     else if (inst.SUBOP10 == 60)  // andcx
     {
-      BIC(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      BIC(gpr.R(a), gpr.R(s), gpr.R(b));
     }
     else if (inst.SUBOP10 == 444)  // orx
     {
-      ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      ORR(gpr.R(a), gpr.R(s), gpr.R(b));
     }
     else if (inst.SUBOP10 == 124)  // norx
     {
-      ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      ORR(gpr.R(a), gpr.R(s), gpr.R(b));
       MVN(gpr.R(a), gpr.R(a));
     }
     else if (inst.SUBOP10 == 412)  // orcx
     {
-      ORN(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      ORN(gpr.R(a), gpr.R(s), gpr.R(b));
     }
     else if (inst.SUBOP10 == 316)  // xorx
     {
-      EOR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0));
+      EOR(gpr.R(a), gpr.R(s), gpr.R(b));
     }
     else if (inst.SUBOP10 == 284)  // eqvx
     {
-      EON(gpr.R(a), gpr.R(b), gpr.R(s), ArithOption(gpr.R(a), ST_LSL, 0));
+      EON(gpr.R(a), gpr.R(b), gpr.R(s));
     }
     else
     {
@@ -418,7 +401,7 @@ void JitArm64::negx(UGeckoInstruction inst)
   else
   {
     gpr.BindToRegister(d, d == a);
-    SUB(gpr.R(d), WSP, gpr.R(a), ArithOption(gpr.R(a), ST_LSL, 0));
+    SUB(gpr.R(d), WSP, gpr.R(a));
     if (inst.Rc)
       ComputeRC(gpr.R(d), 0);
   }
@@ -692,8 +675,11 @@ void JitArm64::addic(UGeckoInstruction inst)
     else
     {
       ARM64Reg WA = gpr.GetReg();
-      MOVI2R(WA, imm);
-      ADDS(gpr.R(d), gpr.R(a), WA);
+      MOVI2R(WA, std::abs(simm));
+      if (simm < 0)
+        SUBS(gpr.R(d), gpr.R(a), WA);
+      else
+        ADDS(gpr.R(d), gpr.R(a), WA);
       gpr.Unlock(WA);
     }
 

diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp
@@ -9,19 +9,18 @@
 
 using namespace Arm64Gen;
 
-ARM64Reg src_reg = X0;
-ARM64Reg dst_reg = X1;
-ARM64Reg count_reg = W2;
-ARM64Reg skipped_reg = W17;
-ARM64Reg scratch1_reg = W16;
-ARM64Reg scratch2_reg = W15;
-ARM64Reg scratch3_reg = W14;
-ARM64Reg scratch4_reg = W13;
-ARM64Reg saved_count = W12;
-
-ARM64Reg stride_reg = X11;
-ARM64Reg arraybase_reg = X10;
-ARM64Reg scale_reg = X9;
+constexpr ARM64Reg src_reg = X0;
+constexpr ARM64Reg dst_reg = X1;
+constexpr ARM64Reg count_reg = W2;
+constexpr ARM64Reg skipped_reg = W17;
+constexpr ARM64Reg scratch1_reg = W16;
+constexpr ARM64Reg scratch2_reg = W15;
+constexpr ARM64Reg scratch3_reg = W14;
+constexpr ARM64Reg saved_count = W12;
+
+constexpr ARM64Reg stride_reg = X11;
+constexpr ARM64Reg arraybase_reg = X10;
+constexpr ARM64Reg scale_reg = X9;
 
 alignas(16) static const float scale_factors[] = {
     1.0 / (1ULL << 0),  1.0 / (1ULL << 1),  1.0 / (1ULL << 2),  1.0 / (1ULL << 3),