Skip to content

Commit

Permalink
Merge pull request #3956 from mmastrac/mov_sum
Browse files Browse the repository at this point in the history
Add MOV optimizations and MOV_sum
  • Loading branch information
degasus committed Jun 27, 2016
2 parents adcef04 + 1e08ad0 commit 93a5efa
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 88 deletions.
77 changes: 77 additions & 0 deletions Source/Core/Common/x64Emitter.cpp
Expand Up @@ -1540,6 +1540,13 @@ void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2)
}
void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2)
{
// Shortcut to zero a register
if (a2.IsZero() && a1.IsSimpleReg() && !flags_locked)
{
XOR(bits, a1, a1);
return;
}

if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
ERROR_LOG(DYNA_REC, "Redundant MOV @ %p - bug in JIT?", code);
WriteNormalOp(bits, nrmMOV, a1, a2);
Expand Down Expand Up @@ -1572,6 +1579,76 @@ void XEmitter::CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2)
}
}

void XEmitter::MOV_sum(int bits, X64Reg dest, const OpArg& a1, const OpArg& a2)
{
// This stomps on flags, so ensure they aren't locked
_dbg_assert_(DYNA_REC, !flags_locked);

// Zero shortcuts (note that this can generate no code in the case where a1 == dest && a2 == zero
// or a2 == dest && a1 == zero)
if (a1.IsZero())
{
if (!a2.IsSimpleReg() || a2.GetSimpleReg() != dest)
{
MOV(bits, R(dest), a2);
}
return;
}
if (a2.IsZero())
{
if (!a1.IsSimpleReg() || a1.GetSimpleReg() != dest)
{
MOV(bits, R(dest), a1);
}
return;
}

// If dest == a1 or dest == a2 we can simplify this
if (a1.IsSimpleReg() && a1.GetSimpleReg() == dest)
{
ADD(bits, R(dest), a2);
return;
}

if (a2.IsSimpleReg() && a2.GetSimpleReg() == dest)
{
ADD(bits, R(dest), a1);
return;
}

// TODO: 32-bit optimizations may apply to other bit sizes (confirm)
if (bits == 32)
{
if (a1.IsImm() && a2.IsImm())
{
MOV(32, R(dest), Imm32(a1.Imm32() + a2.Imm32()));
return;
}

if (a1.IsSimpleReg() && a2.IsSimpleReg())
{
LEA(32, dest, MRegSum(a1.GetSimpleReg(), a2.GetSimpleReg()));
return;
}

if (a1.IsSimpleReg() && a2.IsImm())
{
LEA(32, dest, MDisp(a1.GetSimpleReg(), a2.Imm32()));
return;
}

if (a1.IsImm() && a2.IsSimpleReg())
{
LEA(32, dest, MDisp(a2.GetSimpleReg(), a1.Imm32()));
return;
}
}

// Fallback
MOV(bits, R(dest), a1);
ADD(bits, R(dest), a2);
}

void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/Common/x64Emitter.h
Expand Up @@ -310,6 +310,7 @@ struct OpArg
}
bool IsSimpleReg() const { return scale == SCALE_NONE; }
bool IsSimpleReg(X64Reg reg) const { return IsSimpleReg() && GetSimpleReg() == reg; }
bool IsZero() const { return IsImm() && offset == 0; }
int GetImmBits() const
{
switch (scale)
Expand Down Expand Up @@ -639,6 +640,7 @@ class XEmitter
void TEST(int bits, const OpArg& a1, const OpArg& a2);

void CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2);
void MOV_sum(int bits, X64Reg dest, const OpArg& a1, const OpArg& a2);

// Are these useful at all? Consider removing.
void XCHG(int bits, const OpArg& a1, const OpArg& a2);
Expand Down
43 changes: 4 additions & 39 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Expand Up @@ -236,27 +236,9 @@ void Jit64::lXXx(UGeckoInstruction inst)
opAddress = R(RSCRATCH2);
storeAddress = true;
if (use_constant_offset)
{
if (gpr.R(a).IsSimpleReg() && offset != 0)
{
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, opAddress, gpr.R(a));
if (offset != 0)
ADD(32, opAddress, Imm32((u32)offset));
}
}
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
}
MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset));
else
{
MOV(32, opAddress, gpr.R(a));
ADD(32, opAddress, gpr.R(b));
}
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
}
}
}
Expand Down Expand Up @@ -307,16 +289,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
X64Reg tmp = gpr.GetFreeXReg();
gpr.FlushLockX(tmp);

if (inst.RA && gpr.R(inst.RA).IsSimpleReg() && gpr.R(inst.RB).IsSimpleReg())
{
LEA(32, addr, MRegSum(gpr.RX(inst.RA), gpr.RX(inst.RB)));
}
else
{
MOV(32, R(addr), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(addr), gpr.R(inst.RA));
}
MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB));

// Check whether a JIT cache line needs to be invalidated.
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
Expand Down Expand Up @@ -521,15 +494,7 @@ void Jit64::stXx(UGeckoInstruction inst)
if (update)
gpr.BindToRegister(a, true, true);

if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
}
else
{
MOV(32, R(RSCRATCH2), gpr.R(a));
ADD(32, R(RSCRATCH2), gpr.R(b));
}
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));

int accessSize;
switch (inst.SUBOP10 & ~32)
Expand Down
22 changes: 3 additions & 19 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
Expand Up @@ -50,14 +50,7 @@ void Jit64::lfXXX(UGeckoInstruction inst)
else
{
addr = R(RSCRATCH2);
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
else
{
MOV(32, addr, gpr.R(b));
if (a)
ADD(32, addr, gpr.R(a));
}
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
}
}
else
Expand Down Expand Up @@ -162,14 +155,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
gpr.BindToRegister(a, true, true);
if (indexed)
{
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
else
{
MOV(32, R(RSCRATCH2), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH2), gpr.R(a));
}
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
}
else
{
Expand Down Expand Up @@ -209,9 +195,7 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;

MOV(32, R(RSCRATCH2), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH2), gpr.R(a));
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));

if (fpr.R(s).IsSimpleReg())
MOVD_xmm(R(RSCRATCH), fpr.RX(s));
Expand Down
36 changes: 6 additions & 30 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
Expand Up @@ -43,21 +43,9 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
if (indexed)
LEA(32, RSCRATCH_EXTRA, MRegSum(gpr.RX(a), gpr.RX(b)));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}

MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));

// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
Expand Down Expand Up @@ -143,21 +131,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
if (indexed)
LEA(32, RSCRATCH_EXTRA, MRegSum(gpr.RX(a), gpr.RX(b)));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}

MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));

// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
Expand Down

0 comments on commit 93a5efa

Please sign in to comment.