Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #11243 from Sintendo/arm64mul
JitArm64: Optimize multiplication
  • Loading branch information
AdmiralCurtiss committed Nov 15, 2022
2 parents c9e7480 + 274e34d commit d7593dd
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 3 deletions.
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/JitArm64/Jit.h
Expand Up @@ -332,6 +332,7 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64,
Arm64Gen::ARM64Reg),
bool Rc = false);
bool MultiplyImmediate(u32 imm, int a, int d, bool rc);

void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg);
void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input);
Expand Down
90 changes: 87 additions & 3 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
Expand Up @@ -7,6 +7,7 @@
#include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"

#include "Core/Core.h"
#include "Core/CoreTiming.h"
Expand Down Expand Up @@ -883,6 +884,75 @@ void JitArm64::addic(UGeckoInstruction inst)
}
}

bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc)
{
if (imm == 0)
{
// Multiplication by zero (0).
gpr.SetImmediate(d, 0);
if (rc)
ComputeRC0(gpr.GetImm(d));
}
else if (imm == 1)
{
// Multiplication by one (1).
if (d != a)
{
gpr.BindToRegister(d, false);
MOV(gpr.R(d), gpr.R(a));
}
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(imm))
{
// Multiplication by a power of two (2^n).
const int shift = IntLog2(imm);

gpr.BindToRegister(d, d == a);
LSL(gpr.R(d), gpr.R(a), shift);
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(imm - 1))
{
// Multiplication by a power of two plus one (2^n + 1).
const int shift = IntLog2(imm - 1);

gpr.BindToRegister(d, d == a);
ADD(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift));
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(~imm + 1))
{
// Multiplication by a negative power of two (-(2^n)).
const int shift = IntLog2(~imm + 1);

gpr.BindToRegister(d, d == a);
NEG(gpr.R(d), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift));
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(~imm + 2))
{
// Multiplication by a negative power of two plus one (-(2^n) + 1).
const int shift = IntLog2(~imm + 2);

gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift));
if (rc)
ComputeRC0(gpr.R(d));
}
else
{
// Immediate did not match any known special cases.
return false;
}

return true;
}

void JitArm64::mulli(UGeckoInstruction inst)
{
INSTRUCTION_START
Expand All @@ -895,13 +965,22 @@ void JitArm64::mulli(UGeckoInstruction inst)
s32 i = (s32)gpr.GetImm(a);
gpr.SetImmediate(d, i * inst.SIMM_16);
}
else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false))
{
// Code is generated inside MultiplyImmediate, nothing to be done here.
}
else
{
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
const bool allocate_reg = d == a;
gpr.BindToRegister(d, allocate_reg);

// Reuse d to hold the immediate if possible, allocate a register otherwise.
ARM64Reg WA = allocate_reg ? gpr.GetReg() : gpr.R(d);

MOVI2R(WA, (u32)(s32)inst.SIMM_16);
MUL(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
if (allocate_reg)
gpr.Unlock(WA);
}
}

Expand All @@ -920,6 +999,11 @@ void JitArm64::mullwx(UGeckoInstruction inst)
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) ||
(gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc)))
{
// Code is generated inside MultiplyImmediate, nothing to be done here.
}
else
{
gpr.BindToRegister(d, d == a || d == b);
Expand Down

0 comments on commit d7593dd

Please sign in to comment.