From 2580837c60b991ae34e7f7ae4b562e382857c4de Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Sat, 23 Mar 2024 13:08:35 +0100 Subject: [PATCH] JitArm64: Optimize divwux When the divisor is a constant value, we can emit more efficient code. For powers of two, we can use bit shifts. For other values, we can instead use a multiplication by magic constant method. - Example 1 - Division by 16 (power of two) Before: mov w24, #0x10 ; =16 udiv w27, w25, w24 After: lsr w27, w25, #4 - Example 2 - Division by 10 (fast) Before: mov w25, #0xa ; =10 udiv w27, w26, w25 After: mov w27, #0xcccd ; =52429 movk w27, #0xcccc, lsl #16 umull x27, w26, w27 lsr x27, x27, #35 - Example 3 - Division by 127 (slow) Before: mov w26, #0x7f ; =127 udiv w27, w27, w26 After: mov w26, #0x408 ; =1032 movk w26, #0x8102, lsl #16 umaddl x27, w27, w26, x26 lsr x27, x27, #38 --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 95f75a3f2a27..b5ebfc8bf4e5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1538,6 +1538,60 @@ void JitArm64::divwux(UGeckoInstruction inst) if (inst.Rc) ComputeRC0(gpr.GetImm(d)); } + else if (gpr.IsImm(b)) + { + const u32 divisor = gpr.GetImm(b); + + if (divisor == 0) + { + gpr.SetImmediate(d, 0); + if (inst.Rc) + ComputeRC0(0); + } + else + { + const bool allocate_reg = d == a; + gpr.BindToRegister(d, allocate_reg); + + ARM64Reg RD = gpr.R(d); + ARM64Reg RA = gpr.R(a); + + if (MathUtil::IsPow2(divisor)) + { + int shift = MathUtil::IntLog2(divisor); + if (shift) + LSR(RD, RA, shift); + else if (d != a) + MOV(RD, RA); + } + else + { + UnsignedMagic m = UnsignedDivisionConstants(divisor); + + ARM64Reg WI = allocate_reg ? gpr.GetReg() : RD; + ARM64Reg XD = EncodeRegTo64(RD); + + MOVI2R(WI, m.multiplier); + + if (m.fast) + { + UMULL(XD, RA, WI); + } + else + { + UMADDL(XD, RA, WI, EncodeRegTo64(WI)); + } + + LSR(XD, XD, 32 + m.shift); + + if (allocate_reg) + gpr.Unlock(WI); + } + + if (inst.Rc) + ComputeRC0(gpr.R(d)); + } + } else { gpr.BindToRegister(d, d == a || d == b);