This file was deleted.

This file was deleted.

@@ -8,7 +8,7 @@
#include "Core/CoreTiming.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
@@ -160,9 +160,13 @@ void Jit64::bcx(UGeckoInstruction inst)
else
destination = js.compilerPC + SignExt16(inst.BD << 2);

gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
WriteExit(destination, inst.LK, js.compilerPC + 4);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
WriteExit(destination, inst.LK, js.compilerPC + 4);
}

if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget(pConditionDontBranch);
@@ -215,10 +219,14 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;

gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
}
SetJumpTarget(b);

if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
@@ -269,9 +277,13 @@ void Jit64::bclrx(UGeckoInstruction inst)
if (inst.LK)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));

gpr.Flush(RegCache::FlushMode::MaintainState);
fpr.Flush(RegCache::FlushMode::MaintainState);
WriteBLRExit();
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
WriteBLRExit();
}

if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget(pConditionDontBranch);

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -6,7 +6,7 @@
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"

using namespace Gen;
@@ -30,25 +30,27 @@ void Jit64::lfXXX(UGeckoInstruction inst)

FALLBACK_IF(!indexed && !a);

gpr.BindToRegister(a, true, update);

s32 offset = 0;
OpArg addr = gpr.R(a);
RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RegCache::Realize(addr);

if (update && jo.memcheck)
{
addr = R(RSCRATCH2);
MOV(32, addr, gpr.R(a));
MOV(32, R(RSCRATCH2), addr);
addr = RCOpArg::R(RSCRATCH2);
}
if (indexed)
{
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);
if (update)
{
ADD(32, addr, gpr.R(b));
ADD(32, addr, Rb);
}
else
{
addr = R(RSCRATCH2);
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
MOV_sum(32, RSCRATCH2, a ? addr.Location() : Imm32(0), Rb);
addr = RCOpArg::R(RSCRATCH2);
}
}
else
@@ -59,31 +61,29 @@ void Jit64::lfXXX(UGeckoInstruction inst)
offset = (s16)inst.SIMM_16;
}

fpr.Lock(d);
if (jo.memcheck && single)
{
fpr.StoreFromRegister(d);
js.revertFprLoad = d;
}
fpr.BindToRegister(d, !single);
RCMode Rd_mode = !single ? RCMode::ReadWrite : RCMode::Write;
RCX64Reg Rd = jo.memcheck && single ? fpr.RevertableBind(d, Rd_mode) : fpr.Bind(d, Rd_mode);
RegCache::Realize(Rd);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && jo.memcheck)
registersInUse[RSCRATCH2] = true;
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);

if (single)
{
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
ConvertSingleToDouble(Rd, RSCRATCH, true);
}
else
{
MOVQ_xmm(XMM0, R(RSCRATCH));
MOVSD(fpr.RX(d), R(XMM0));
MOVSD(Rd, R(XMM0));
}
if (update && jo.memcheck)
MOV(32, gpr.R(a), addr);
fpr.UnlockAll();
gpr.UnlockAll();
{
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Ra);
MOV(32, Ra, addr);
}
}

void Jit64::stfXXX(UGeckoInstruction inst)
@@ -107,26 +107,31 @@ void Jit64::stfXXX(UGeckoInstruction inst)
{
if (js.op->fprIsStoreSafe[s])
{
CVTSD2SS(XMM0, fpr.R(s));
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
CVTSD2SS(XMM0, Rs);
}
else
{
fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s));
RCX64Reg Rs = fpr.Bind(s, RCMode::Read);
RegCache::Realize(Rs);
ConvertDoubleToSingle(XMM0, Rs);
}
MOVD_xmm(R(RSCRATCH), XMM0);
}
else
{
if (fpr.R(s).IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
if (Rs.IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), Rs.GetSimpleReg());
else
MOV(64, R(RSCRATCH), fpr.R(s));
MOV(64, R(RSCRATCH), Rs);
}

if (!indexed && (!a || gpr.R(a).IsImm()))
if (!indexed && (!a || gpr.IsImm(a)))
{
u32 addr = (a ? gpr.R(a).Imm32() : 0) + imm;
u32 addr = (a ? gpr.Imm32(a) : 0) + imm;
bool exception =
WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse());

@@ -138,33 +143,34 @@ void Jit64::stfXXX(UGeckoInstruction inst)
}
else
{
gpr.KillImmediate(a, true, true);
RCOpArg Ra = gpr.UseNoImm(a, RCMode::ReadWrite);
RegCache::Realize(Ra);
MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)imm));
ADD(32, Ra, Imm32((u32)imm));
}
}
fpr.UnlockAll();
gpr.UnlockAll();
return;
}

s32 offset = 0;
if (update)
gpr.BindToRegister(a, true, true);
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
RegCache::Realize(Ra);
if (indexed)
{
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);
MOV_sum(32, RSCRATCH2, a ? Ra.Location() : Imm32(0), Rb);
}
else
{
if (update)
{
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm));
MOV_sum(32, RSCRATCH2, Ra, Imm32(imm));
}
else
{
offset = imm;
MOV(32, R(RSCRATCH2), gpr.R(a));
MOV(32, R(RSCRATCH2), Ra);
}
}

@@ -176,11 +182,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);

if (update)
MOV(32, gpr.R(a), R(RSCRATCH2));

fpr.UnlockAll();
gpr.UnlockAll();
gpr.UnlockAllX();
MOV(32, Ra, R(RSCRATCH2));
}

// This one is a little bit weird; it stores the low 32 bits of a double without converting it
@@ -193,12 +195,16 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;

MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(Ra, Rb, Rs);

MOV_sum(32, RSCRATCH2, Ra, Rb);

if (fpr.R(s).IsSimpleReg())
MOVD_xmm(R(RSCRATCH), fpr.RX(s));
if (Rs.IsSimpleReg())
MOVD_xmm(R(RSCRATCH), Rs.GetSimpleReg());
else
MOV(32, R(RSCRATCH), fpr.R(s));
MOV(32, R(RSCRATCH), Rs);
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse());
gpr.UnlockAllX();
}
@@ -9,7 +9,7 @@

#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/PowerPC.h"
@@ -40,21 +40,22 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
bool gqrIsConstant = it != js.constantGqr.end();
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;

gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
RCOpArg Rs = fpr.Use(s, RCMode::Read);
RegCache::Realize(scratch_guard, Ra, Rb, Rs);

MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);

// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, Ra, R(RSCRATCH_EXTRA));

if (w)
CVTSD2SS(XMM0, fpr.R(s)); // one
CVTSD2SS(XMM0, Rs); // one
else
CVTPD2PS(XMM0, fpr.R(s)); // pair
CVTPD2PS(XMM0, Rs); // pair

if (gqrIsConstant)
{
@@ -104,13 +105,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)

if (update && jo.memcheck)
{
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
ADD(32, Ra, Rb);
}
gpr.UnlockAll();
gpr.UnlockAllX();
}

void Jit64::psq_lXX(UGeckoInstruction inst)
@@ -135,17 +131,17 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
bool gqrIsConstant = it != js.constantGqr.end();
u32 gqrValue = gqrIsConstant ? it->second >> 16 : 0;

gpr.Lock(a, b);

gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
RCX64Reg Rs = fpr.Bind(s, RCMode::Write);
RegCache::Realize(scratch_guard, Ra, Rb, Rs);

MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
MOV_sum(32, RSCRATCH_EXTRA, Ra, Rb);

// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, Ra, R(RSCRATCH_EXTRA));

if (gqrIsConstant)
{
@@ -169,15 +165,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MatR(RSCRATCH));
}

CVTPS2PD(fpr.RX(s), R(XMM0));
CVTPS2PD(Rs, R(XMM0));
if (update && jo.memcheck)
{
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
ADD(32, Ra, Rb);
}

gpr.UnlockAll();
gpr.UnlockAllX();
}
@@ -7,7 +7,7 @@
#include "Common/MsgHandler.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"

using namespace Gen;

@@ -22,8 +22,10 @@ void Jit64::ps_mr(UGeckoInstruction inst)
if (d == b)
return;

fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rb, Rd);
MOVAPD(Rd, Rb);
}

void Jit64::ps_sum(UGeckoInstruction inst)
@@ -36,43 +38,46 @@ void Jit64::ps_sum(UGeckoInstruction inst)
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.Lock(a, b, c, d);
OpArg op_a = fpr.R(a);
fpr.BindToRegister(d, d == b || d == c);

RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCOpArg Rc = fpr.Use(c, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rb, Rc, Rd);

X64Reg tmp = XMM1;
MOVDDUP(tmp, op_a); // {a.ps0, a.ps0}
ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
MOVDDUP(tmp, Ra); // {a.ps0, a.ps0}
ADDPD(tmp, Rb); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
switch (inst.SUBOP5)
{
case 10: // ps_sum0: {a.ps0 + b.ps1, c.ps1}
UNPCKHPD(tmp, fpr.R(c));
UNPCKHPD(tmp, Rc);
break;
case 11: // ps_sum1: {c.ps0, a.ps0 + b.ps1}
if (fpr.R(c).IsSimpleReg())
if (Rc.IsSimpleReg())
{
if (cpu_info.bSSE4_1)
{
BLENDPD(tmp, fpr.R(c), 1);
BLENDPD(tmp, Rc, 1);
}
else
{
MOVAPD(XMM0, fpr.R(c));
MOVAPD(XMM0, Rc);
SHUFPD(XMM0, R(tmp), 2);
tmp = XMM0;
}
}
else
{
MOVLPD(tmp, fpr.R(c));
MOVLPD(tmp, Rc);
}
break;
default:
PanicAlert("ps_sum WTF!!!");
}
HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}

void Jit64::ps_muls(UGeckoInstruction inst)
@@ -85,26 +90,29 @@ void Jit64::ps_muls(UGeckoInstruction inst)
int a = inst.FA;
int c = inst.FC;
bool round_input = !js.op->fprIsSingle[c];
fpr.Lock(a, c, d);

RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rc = fpr.Use(c, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rc, Rd);

switch (inst.SUBOP5)
{
case 12: // ps_muls0
MOVDDUP(XMM1, fpr.R(c));
MOVDDUP(XMM1, Rc);
break;
case 13: // ps_muls1
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, fpr.R(c), fpr.R(c), 3);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM1, Rc, Rc, 3);
break;
default:
PanicAlert("ps_muls WTF!!!");
}
if (round_input)
Force25BitPrecision(XMM1, R(XMM1), XMM0);
MULPD(XMM1, fpr.R(a));
fpr.BindToRegister(d, false);
HandleNaNs(inst, fpr.RX(d), XMM1);
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
MULPD(XMM1, Ra);
HandleNaNs(inst, Rd, XMM1);
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}

void Jit64::ps_mergeXX(UGeckoInstruction inst)
@@ -116,27 +124,29 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
fpr.Lock(a, b, d);
fpr.BindToRegister(d, d == a || d == b);

RCOpArg Ra = fpr.Use(a, RCMode::Read);
RCOpArg Rb = fpr.Use(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rb, Rd);

switch (inst.SUBOP10)
{
case 528:
avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, fpr.RX(d), fpr.R(a), fpr.R(b));
avx_op(&XEmitter::VUNPCKLPD, &XEmitter::UNPCKLPD, Rd, Ra, Rb);
break; // 00
case 560:
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 2);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 2);
break; // 01
case 592:
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, fpr.RX(d), fpr.R(a), fpr.R(b), 1);
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rd, Ra, Rb, 1);
break; // 10
case 624:
avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, fpr.RX(d), fpr.R(a), fpr.R(b));
avx_op(&XEmitter::VUNPCKHPD, &XEmitter::UNPCKHPD, Rd, Ra, Rb);
break; // 11
default:
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
}
fpr.UnlockAll();
}

void Jit64::ps_rsqrte(UGeckoInstruction inst)
@@ -147,23 +157,21 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;

gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(b, true, false);
fpr.BindToRegister(d, false);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(scratch_guard, Rb, Rd);

MOVSD(XMM0, fpr.R(b));
MOVSD(XMM0, Rb);
CALL(asm_routines.frsqrte);
MOVSD(fpr.R(d), XMM0);
MOVSD(Rd, XMM0);

MOVHLPS(XMM0, fpr.RX(b));
MOVHLPS(XMM0, Rb);
CALL(asm_routines.frsqrte);
MOVLHPS(fpr.RX(d), XMM0);
MOVLHPS(Rd, XMM0);

ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}

void Jit64::ps_res(UGeckoInstruction inst)
@@ -174,23 +182,21 @@ void Jit64::ps_res(UGeckoInstruction inst)
int b = inst.FB;
int d = inst.FD;

gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(b, true, false);
fpr.BindToRegister(d, false);
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Rb = fpr.Bind(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(scratch_guard, Rb, Rd);

MOVSD(XMM0, fpr.R(b));
MOVSD(XMM0, Rb);
CALL(asm_routines.fres);
MOVSD(fpr.R(d), XMM0);
MOVSD(Rd, XMM0);

MOVHLPS(XMM0, fpr.RX(b));
MOVHLPS(XMM0, Rb);
CALL(asm_routines.fres);
MOVLHPS(fpr.RX(d), XMM0);
MOVLHPS(Rd, XMM0);

ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
ForceSinglePrecision(Rd, Rd);
SetFPRFIfNeeded(Rd);
}

void Jit64::ps_cmpXX(UGeckoInstruction inst)
@@ -9,7 +9,7 @@
#include "Core/CoreTiming.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"

@@ -219,26 +219,32 @@ void Jit64::mtspr(UGeckoInstruction inst)
break;

case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
MOV(32, R(RSCRATCH), gpr.R(d));
{
RCX64Reg Rd = gpr.Bind(d, RCMode::Read);
RegCache::Realize(Rd);

MOV(32, R(RSCRATCH), Rd);
AND(32, R(RSCRATCH), Imm32(0xff7f));
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));

MOV(32, R(RSCRATCH), gpr.R(d));
MOV(32, R(RSCRATCH), Rd);
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(8, R(RSCRATCH), Imm8(1));
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));

MOV(32, R(RSCRATCH), gpr.R(d));
MOV(32, R(RSCRATCH), Rd);
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
gpr.UnlockAll();

return;
}

case SPR_HID0:
{
MOV(32, R(RSCRATCH), gpr.R(d));
RCOpArg Rd = gpr.Use(d, RCMode::Read);
RegCache::Realize(Rd);

MOV(32, R(RSCRATCH), Rd);
BTR(32, R(RSCRATCH), Imm8(31 - 20)); // ICFI
MOV(32, PPCSTATE(spr[iIndex]), R(RSCRATCH));
FixupBranch dont_reset_icache = J_CC(CC_NC);
@@ -255,13 +261,9 @@ void Jit64::mtspr(UGeckoInstruction inst)
}

// OK, this is easy.
if (!gpr.R(d).IsImm())
{
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
}
MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d));
gpr.UnlockAll();
RCOpArg Rd = gpr.BindOrImm(d, RCMode::Read);
RegCache::Realize(Rd);
MOV(32, PPCSTATE(spr[iIndex]), Rd);
}

void Jit64::mfspr(UGeckoInstruction inst)
@@ -281,22 +283,23 @@ void Jit64::mfspr(UGeckoInstruction inst)
// redundant for the JIT.
// no register choice

gpr.FlushLockX(RDX, RAX);
gpr.FlushLockX(RCX);
RCX64Reg rdx = gpr.Scratch(RDX);
RCX64Reg rax = gpr.Scratch(RAX);
RCX64Reg rcx = gpr.Scratch(RCX);

MOV(64, R(RCX), ImmPtr(&CoreTiming::g));
MOV(64, rcx, ImmPtr(&CoreTiming::g));

// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
// Scale downcount by the CPU overclocking factor.
CVTSI2SS(XMM0, PPCSTATE(downcount));
MULSS(XMM0, MDisp(RCX, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, slice_length)));
SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength -
// Scaled_downcount)
ADD(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, global_timer)));
SUB(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
MULSS(XMM0, MDisp(rcx, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
CVTSS2SI(rdx, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, rax, MDisp(rcx, offsetof(CoreTiming::Globals, slice_length)));
SUB(64, rax, rdx); // cycles since the last CoreTiming::Advance() event is (slicelength -
// Scaled_downcount)
ADD(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, global_timer)));
SUB(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
// It might seem convenient to correct the timer for the block position here for even more
// accurate
// timing, but as of currently, this can break games. If we end up reading a time *after* the
@@ -307,15 +310,15 @@ void Jit64::mfspr(UGeckoInstruction inst)
// Revolution,
// which won't get past the loading screen.
// if (js.downcountAmount)
// ADD(64, R(RAX), Imm32(js.downcountAmount));
// ADD(64, rax, Imm32(js.downcountAmount));

// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, R(RDX));
MOV(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_value)));
SHR(64, R(RDX), Imm8(3));
ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
MOV(64, rdx, Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, rdx);
MOV(64, rax, MDisp(rcx, offsetof(CoreTiming::Globals, fake_TB_start_value)));
SHR(64, rdx, Imm8(3));
ADD(64, rax, rdx);
MOV(64, PPCSTATE(spr[SPR_TL]), rax);

if (CanMergeNextInstructions(1))
{
@@ -330,40 +333,42 @@ void Jit64::mfspr(UGeckoInstruction inst)
{
js.downcountAmount++;
js.skipInstructions = 1;
gpr.Lock(d, n);
gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RCX64Reg Rn = gpr.Bind(n, RCMode::Write);
RegCache::Realize(Rd, Rn);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(RAX));
MOV(32, Rd, rax);
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32));
MOV(32, Rn, rax);
SHR(64, rax, Imm8(32));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(RAX));
MOV(32, Rd, rax);
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX));
MOV(32, Rn, rax);
break;
}
}
gpr.Lock(d);
gpr.BindToRegister(d, false);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX));
SHR(64, rax, Imm8(32));
MOV(32, Rd, rax);
break;
}
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, false);
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
{
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOVZX(32, 16, Rd, PPCSTATE(xer_stringctrl));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
OR(32, Rd, R(RSCRATCH));

MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
OR(32, Rd, R(RSCRATCH));
break;
}
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1:
@@ -372,26 +377,25 @@ void Jit64::mfspr(UGeckoInstruction inst)
case SPR_PMC4:
FALLBACK_IF(true);
default:
gpr.Lock(d);
gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
{
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOV(32, Rd, PPCSTATE(spr[iIndex]));
break;
}
gpr.UnlockAllX();
gpr.UnlockAll();
}
}

void Jit64::mtmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
if (!gpr.R(inst.RS).IsImm())

{
gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
}
MOV(32, PPCSTATE(msr), gpr.R(inst.RS));
gpr.UnlockAll();
gpr.Flush();
fpr.Flush();

@@ -430,10 +434,9 @@ void Jit64::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// Privileged?
gpr.Lock(inst.RD);
gpr.BindToRegister(inst.RD, false, true);
MOV(32, gpr.R(inst.RD), PPCSTATE(msr));
gpr.UnlockAll();
RCX64Reg Rd = gpr.Bind(inst.RD, RCMode::Write);
RegCache::Realize(Rd);
MOV(32, Rd, PPCSTATE(msr));
}

void Jit64::mftb(UGeckoInstruction inst)
@@ -448,13 +451,13 @@ void Jit64::mfcr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
int d = inst.RD;
gpr.FlushLockX(RSCRATCH_EXTRA);

RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
CALL(asm_routines.mfcr);
gpr.Lock(d);
gpr.BindToRegister(d, false, true);
MOV(32, gpr.R(d), R(RSCRATCH));
gpr.UnlockAll();
gpr.UnlockAllX();

RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOV(32, Rd, R(RSCRATCH));
}

void Jit64::mtcrf(UGeckoInstruction inst)
@@ -466,13 +469,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
u32 crm = inst.CRM;
if (crm != 0)
{
if (gpr.R(inst.RS).IsImm())
if (gpr.IsImm(inst.RS))
{
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
u8 newcr = (gpr.R(inst.RS).Imm32() >> (28 - (i * 4))) & 0xF;
u8 newcr = (gpr.Imm32(inst.RS) >> (28 - (i * 4))) & 0xF;
u64 newcrval = PowerPC::PPCCRToInternal(newcr);
if ((s64)newcrval == (s32)newcrval)
{
@@ -489,13 +492,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
else
{
MOV(64, R(RSCRATCH2), ImmPtr(PowerPC::m_crTable.data()));
gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
RCX64Reg Rs = gpr.Bind(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
for (int i = 0; i < 8; i++)
{
if ((crm & (0x80 >> i)) != 0)
{
MOV(32, R(RSCRATCH), gpr.R(inst.RS));
MOV(32, R(RSCRATCH), Rs);
if (i != 7)
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
if (i != 0)
@@ -504,7 +507,6 @@ void Jit64::mtcrf(UGeckoInstruction inst)
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
}
}
gpr.UnlockAll();
}
}
}
@@ -653,11 +655,12 @@ void Jit64::mffsx(UGeckoInstruction inst)
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));

int d = inst.FD;
fpr.BindToRegister(d, false, true);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd);
MOV(64, R(RSCRATCH2), Imm64(0xFFF8000000000000));
OR(64, R(RSCRATCH), R(RSCRATCH2));
MOVQ_xmm(XMM0, R(RSCRATCH));
MOVSD(fpr.RX(d), R(XMM0));
MOVSD(Rd, R(XMM0));
}

// MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
@@ -751,10 +754,14 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
}

int b = inst.FB;
if (fpr.R(b).IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), fpr.RX(b));

RCOpArg Rb = fpr.Use(b, RCMode::Read);
RegCache::Realize(Rb);

if (Rb.IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), Rb.GetSimpleReg());
else
MOV(32, R(RSCRATCH), fpr.R(b));
MOV(32, R(RSCRATCH), Rb);

MOV(32, R(RSCRATCH2), PPCSTATE(fpscr));
AND(32, R(RSCRATCH), Imm32(mask));
@@ -0,0 +1,284 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <cstddef>

#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/RegCache/RCMode.h"

using preg_t = size_t;

class PPCCachedReg
{
public:
enum class LocationType
{
/// Value is currently at its default location
Default,
/// Value is currently bound to a x64 register
Bound,
/// Value is known as an immediate and has not been written back to its default location
Immediate,
/// Value is known as an immediate and is already present at its default location
SpeculativeImmediate,
};

PPCCachedReg() = default;

explicit PPCCachedReg(Gen::OpArg default_location_)
: default_location(default_location_), location(default_location_)
{
}

const Gen::OpArg& Location() const { return location; }

LocationType GetLocationType() const
{
if (!away)
{
ASSERT(!revertable);

if (location.IsImm())
return LocationType::SpeculativeImmediate;

ASSERT(location == default_location);
return LocationType::Default;
}

ASSERT(location.IsImm() || location.IsSimpleReg());
return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
}

bool IsAway() const { return away; }
bool IsBound() const { return GetLocationType() == LocationType::Bound; }

void SetBoundTo(Gen::X64Reg xreg)
{
away = true;
location = Gen::R(xreg);
}

void SetFlushed()
{
ASSERT(!revertable);
away = false;
location = default_location;
}

void SetToImm32(u32 imm32, bool dirty = true)
{
away |= dirty;
location = Gen::Imm32(imm32);
}

bool IsRevertable() const { return revertable; }
void SetRevertable()
{
ASSERT(IsBound());
revertable = true;
}
void SetRevert()
{
ASSERT(revertable);
revertable = false;
SetFlushed();
}
void SetCommit()
{
ASSERT(revertable);
revertable = false;
}

bool IsLocked() const { return locked > 0; }
void Lock() { locked++; }
void Unlock()
{
ASSERT(IsLocked());
locked--;
}

private:
Gen::OpArg default_location{};
Gen::OpArg location{};
bool away = false; // value not in source register
bool revertable = false;
size_t locked = 0;
};

class X64CachedReg
{
public:
preg_t Contents() const { return ppcReg; }

void SetBoundTo(preg_t ppcReg_, bool dirty_)
{
free = false;
ppcReg = ppcReg_;
dirty = dirty_;
}

void SetFlushed()
{
ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
free = true;
dirty = false;
}

bool IsFree() const { return free && !locked; }

bool IsDirty() const { return dirty; }
void MakeDirty() { dirty = true; }

bool IsLocked() const { return locked > 0; }
void Lock() { locked++; }
void Unlock()
{
ASSERT(IsLocked());
locked--;
}

private:
preg_t ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
bool free = true;
bool dirty = false;
size_t locked = 0;
};

class RCConstraint
{
public:
bool IsRealized() const { return realized != RealizedLoc::Invalid; }
bool IsActive() const
{
return IsRealized() || write || read || kill_imm || kill_mem || revertable;
}

bool ShouldLoad() const { return read; }
bool ShouldDirty() const { return write; }
bool ShouldBeRevertable() const { return revertable; }
bool ShouldKillImmediate() const { return kill_imm; }
bool ShouldKillMemory() const { return kill_mem; }

enum class RealizedLoc
{
Invalid,
Bound,
Imm,
Mem,
};

void Realized(RealizedLoc loc)
{
realized = loc;
ASSERT(IsRealized());
}

enum class ConstraintLoc
{
Bound,
BoundOrImm,
BoundOrMem,
Any,
};

void AddUse(RCMode mode) { AddConstraint(mode, ConstraintLoc::Any, false); }
void AddUseNoImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrMem, false); }
void AddBindOrImm(RCMode mode) { AddConstraint(mode, ConstraintLoc::BoundOrImm, false); }
void AddBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, false); }
void AddRevertableBind(RCMode mode) { AddConstraint(mode, ConstraintLoc::Bound, true); }

private:
void AddConstraint(RCMode mode, ConstraintLoc loc, bool should_revertable)
{
if (IsRealized())
{
ASSERT(IsCompatible(mode, loc, should_revertable));
return;
}

if (should_revertable)
revertable = true;

switch (loc)
{
case ConstraintLoc::Bound:
kill_imm = true;
kill_mem = true;
break;
case ConstraintLoc::BoundOrImm:
kill_mem = true;
break;
case ConstraintLoc::BoundOrMem:
kill_imm = true;
break;
case ConstraintLoc::Any:
break;
}

switch (mode)
{
case RCMode::Read:
read = true;
break;
case RCMode::Write:
write = true;
break;
case RCMode::ReadWrite:
read = true;
write = true;
break;
}
}

bool IsCompatible(RCMode mode, ConstraintLoc loc, bool should_revertable) const
{
if (should_revertable && !revertable)
{
return false;
}

const bool is_loc_compatible = [&] {
switch (loc)
{
case ConstraintLoc::Bound:
return realized == RealizedLoc::Bound;
case ConstraintLoc::BoundOrImm:
return realized == RealizedLoc::Bound || realized == RealizedLoc::Imm;
case ConstraintLoc::BoundOrMem:
return realized == RealizedLoc::Bound || realized == RealizedLoc::Mem;
case ConstraintLoc::Any:
return true;
}
ASSERT(false);
return false;
}();

const bool is_mode_compatible = [&] {
switch (mode)
{
case RCMode::Read:
return read;
case RCMode::Write:
return write;
case RCMode::ReadWrite:
return read && write;
}
ASSERT(false);
return false;
}();

return is_loc_compatible && is_mode_compatible;
}

RealizedLoc realized = RealizedLoc::Invalid;
bool write = false;
bool read = false;
bool kill_imm = false;
bool kill_mem = false;
bool revertable = false;
};
@@ -2,7 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64/RegCache/FPURegCache.h"

#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
@@ -4,17 +4,17 @@

#pragma once

#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"

class Jit64;

class FPURegCache final : public RegCache
{
public:
explicit FPURegCache(Jit64& jit);
Gen::OpArg GetDefaultLocation(preg_t preg) const override;

protected:
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;
@@ -2,7 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/GPRRegCache.h"

#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
@@ -4,18 +4,18 @@

#pragma once

#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"

class Jit64;

class GPRRegCache final : public RegCache
{
public:
explicit GPRRegCache(Jit64& jit);
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true);

protected:
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override;
void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) const override;

Large diffs are not rendered by default.

@@ -0,0 +1,222 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <array>
#include <cinttypes>
#include <cstddef>
#include <type_traits>
#include <variant>

#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/RegCache/CachedReg.h"
#include "Core/PowerPC/PPCAnalyst.h"

class Jit64;
enum class RCMode;

class RCOpArg;
class RCX64Reg;
class RegCache;

using preg_t = size_t;
static constexpr size_t NUM_XREGS = 16;

class RCOpArg
{
public:
static RCOpArg Imm32(u32 imm);
static RCOpArg R(Gen::X64Reg xr);
RCOpArg();
~RCOpArg();
RCOpArg(RCOpArg&&) noexcept;
RCOpArg& operator=(RCOpArg&&) noexcept;

RCOpArg(RCX64Reg&&) noexcept;
RCOpArg& operator=(RCX64Reg&&) noexcept;

RCOpArg(const RCOpArg&) = delete;
RCOpArg& operator=(const RCOpArg&) = delete;

void Realize();
Gen::OpArg Location() const;
operator Gen::OpArg() const & { return Location(); }
operator Gen::OpArg() const && = delete;
bool IsSimpleReg() const { return Location().IsSimpleReg(); }
bool IsSimpleReg(Gen::X64Reg reg) const { return Location().IsSimpleReg(reg); }
Gen::X64Reg GetSimpleReg() const { return Location().GetSimpleReg(); }

// Use to extract bytes from a register using the regcache. offset is in bytes.
Gen::OpArg ExtractWithByteOffset(int offset);

void Unlock();

bool IsImm() const;
s32 SImm32() const;
u32 Imm32() const;
bool IsZero() const { return IsImm() && Imm32() == 0; }

private:
friend class RegCache;

explicit RCOpArg(u32 imm);
explicit RCOpArg(Gen::X64Reg xr);
RCOpArg(RegCache* rc_, preg_t preg);

RegCache* rc = nullptr;
std::variant<std::monostate, Gen::X64Reg, u32, preg_t> contents;
};

class RCX64Reg
{
public:
RCX64Reg();
~RCX64Reg();
RCX64Reg(RCX64Reg&&) noexcept;
RCX64Reg& operator=(RCX64Reg&&) noexcept;

RCX64Reg(const RCX64Reg&) = delete;
RCX64Reg& operator=(const RCX64Reg&) = delete;

void Realize();
operator Gen::OpArg() const &;
operator Gen::X64Reg() const &;
operator Gen::OpArg() const && = delete;
operator Gen::X64Reg() const && = delete;

void Unlock();

private:
friend class RegCache;
friend class RCOpArg;

RCX64Reg(RegCache* rc_, preg_t preg);
RCX64Reg(RegCache* rc_, Gen::X64Reg xr);

RegCache* rc = nullptr;
std::variant<std::monostate, Gen::X64Reg, preg_t> contents;
};

class RCForkGuard
{
public:
~RCForkGuard() { EndFork(); }
RCForkGuard(RCForkGuard&&) noexcept;

RCForkGuard(const RCForkGuard&) = delete;
RCForkGuard& operator=(const RCForkGuard&) = delete;
RCForkGuard& operator=(RCForkGuard&&) = delete;

void EndFork();

private:
friend class RegCache;

explicit RCForkGuard(RegCache& rc_);

RegCache* rc;
std::array<PPCCachedReg, 32> m_regs;
std::array<X64CachedReg, NUM_XREGS> m_xregs;
};

class RegCache
{
public:
enum class FlushMode
{
Full,
MaintainState,
};

explicit RegCache(Jit64& jit);
virtual ~RegCache() = default;

void Start();
void SetEmitter(Gen::XEmitter* emitter);
bool SanityCheck() const;

template <typename... Ts>
static void Realize(Ts&... rc)
{
static_assert(((std::is_same<Ts, RCOpArg>() || std::is_same<Ts, RCX64Reg>()) && ...));
(rc.Realize(), ...);
}

template <typename... Ts>
static void Unlock(Ts&... rc)
{
static_assert(((std::is_same<Ts, RCOpArg>() || std::is_same<Ts, RCX64Reg>()) && ...));
(rc.Unlock(), ...);
}

template <typename... Args>
bool IsImm(Args... pregs) const
{
static_assert(sizeof...(pregs) > 0);
return (R(pregs).IsImm() && ...);
}
u32 Imm32(preg_t preg) const { return R(preg).Imm32(); }
s32 SImm32(preg_t preg) const { return R(preg).SImm32(); }

RCOpArg Use(preg_t preg, RCMode mode);
RCOpArg UseNoImm(preg_t preg, RCMode mode);
RCOpArg BindOrImm(preg_t preg, RCMode mode);
RCX64Reg Bind(preg_t preg, RCMode mode);
RCX64Reg RevertableBind(preg_t preg, RCMode mode);
RCX64Reg Scratch();
RCX64Reg Scratch(Gen::X64Reg xr);

RCForkGuard Fork();
void Flush(BitSet32 pregs = BitSet32::AllTrue(32));
void Revert();
void Commit();

bool IsAllUnlocked() const;

void PreloadRegisters(BitSet32 pregs);
BitSet32 RegistersInUse() const;

protected:
friend class RCOpArg;
friend class RCX64Reg;
friend class RCForkGuard;

virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;

virtual const Gen::X64Reg* GetAllocationOrder(size_t* count) const = 0;

virtual BitSet32 GetRegUtilization() const = 0;
virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;

void FlushX(Gen::X64Reg reg);
void DiscardRegContentsIfCached(preg_t preg);
void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full);

Gen::X64Reg GetFreeXReg();

int NumFreeRegisters() const;
float ScoreRegister(Gen::X64Reg xreg) const;

const Gen::OpArg& R(preg_t preg) const;
Gen::X64Reg RX(preg_t preg) const;

void Lock(preg_t preg);
void Unlock(preg_t preg);
void LockX(Gen::X64Reg xr);
void UnlockX(Gen::X64Reg xr);
bool IsRealized(preg_t preg) const;
void Realize(preg_t preg);

bool IsAnyConstraintActive() const;

Jit64& m_jit;
std::array<PPCCachedReg, 32> m_regs;
std::array<X64CachedReg, NUM_XREGS> m_xregs;
std::array<RCConstraint, 32> m_constraints;
Gen::XEmitter* m_emitter = nullptr;
};
@@ -0,0 +1,12 @@
// Copyright 2018 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

enum class RCMode
{
Read,
Write,
ReadWrite,
};
@@ -70,11 +70,6 @@ class JitBase : public CPUCoreBase
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
// If these are set, we've stored the old value of a register which will be loaded in
// revertLoad,
// which lets us revert it on the exception path.
int revertGprLoad;
int revertFprLoad;

bool assumeNoPairedQuantize;
std::map<u8, u32> constantGqr;