96 changes: 48 additions & 48 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Expand Up @@ -27,9 +27,9 @@ using namespace Arm64Gen;
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);

gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
Expand All @@ -42,7 +42,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);

ARM64Reg addr_reg = ARM64Reg::W0;
ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0;
bool is_immediate = false;

Expand Down Expand Up @@ -123,9 +123,9 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;

Expand All @@ -142,7 +142,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(dest_reg)] = 0;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use,
Expand All @@ -165,18 +165,18 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg);
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
}

void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);

ARM64Reg RS = gpr.R(value);

Expand All @@ -188,7 +188,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);

ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg addr_reg = ARM64Reg::W2;

u32 imm_addr = 0;
bool is_immediate = false;
Expand Down Expand Up @@ -269,11 +269,11 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;

u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
Expand All @@ -290,19 +290,19 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else
accessSize = 8;

LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));

ARM64Reg temp = ARM64Reg::W1;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);

if (accessSize == 32)
STR(IndexType::Post, temp, ARM64Reg::X0, 4);
STR(IndexType::Post, temp, ARM64Reg::X2, 4);
else if (accessSize == 16)
STRH(IndexType::Post, temp, ARM64Reg::X0, 2);
STRH(IndexType::Post, temp, ARM64Reg::X2, 2);
else
STRB(IndexType::Post, temp, ARM64Reg::X0, 1);
STRB(IndexType::Post, temp, ARM64Reg::X2, 1);

STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));

js.fifoBytesSinceCheck += accessSize >> 3;
}
Expand All @@ -313,8 +313,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(RS)] = 0;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
Expand All @@ -334,9 +334,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
}

FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
Expand Down Expand Up @@ -518,13 +518,13 @@ void JitArm64::lmw(UGeckoInstruction inst)
u32 a = inst.RA, d = inst.RD;
s32 offset = inst.SIMM_16;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);

// MMU games make use of a >= d despite this being invalid according to the PEM.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W0;
ARM64Reg addr_reg = ARM64Reg::W1;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
Expand Down Expand Up @@ -554,8 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;

Expand All @@ -566,9 +566,9 @@ void JitArm64::lmw(UGeckoInstruction inst)
ASSERT(dest_reg == gpr.R(i));
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
}
Expand All @@ -581,11 +581,11 @@ void JitArm64::stmw(UGeckoInstruction inst)
u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);

ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg addr_reg = ARM64Reg::W2;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
Expand Down Expand Up @@ -613,18 +613,18 @@ void JitArm64::stmw(UGeckoInstruction inst)

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
}
Expand Down Expand Up @@ -821,17 +821,17 @@ void JitArm64::dcbz(UGeckoInstruction inst)

int a = inst.RA, b = inst.RB;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);

Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
});

constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg temp_reg = ARM64Reg::W30;

// HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region.
Expand Down Expand Up @@ -895,11 +895,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)

BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
gprs_to_push[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;

EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);

if (using_dcbz_hack)
Expand Down
40 changes: 20 additions & 20 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
Expand Up @@ -77,13 +77,13 @@ void JitArm64::lfXX(UGeckoInstruction inst)
const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);

const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0;
ARM64Reg addr_reg = ARM64Reg::W1;

if (update)
{
Expand Down Expand Up @@ -167,9 +167,9 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;
Expand All @@ -192,10 +192,10 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
}

void JitArm64::stfXX(UGeckoInstruction inst)
Expand Down Expand Up @@ -278,11 +278,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)
V0 = single_reg;
}

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);

ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg addr_reg = ARM64Reg::W2;

if (update)
{
Expand Down Expand Up @@ -369,11 +369,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;

if (is_immediate)
Expand All @@ -386,17 +386,17 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else
accessSize = 32;

LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
LDR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));

if (flags & BackPatchInfo::FLAG_SIZE_64)
m_float_emit.REV64(8, ARM64Reg::Q0, V0);
else if (flags & BackPatchInfo::FLAG_SIZE_32)
m_float_emit.REV32(8, ARM64Reg::D0, V0);

m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0,
ARM64Reg::X0, accessSize >> 3);
ARM64Reg::X2, accessSize >> 3);

STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (m_mmu.IsOptimizableRAMAddress(imm_addr))
Expand Down Expand Up @@ -428,8 +428,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
if (want_single && !have_single)
fpr.Unlock(V0);

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
}
50 changes: 25 additions & 25 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp
Expand Up @@ -37,21 +37,21 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
}
else if (!jo.fastmem)
else if (jo.memcheck || !jo.fastmem)
{
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W0);
}

constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg scale_reg = ARM64Reg::W2;
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);

if (inst.RA || update) // Always uses the register on update
Expand Down Expand Up @@ -85,9 +85,9 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)

// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0;
Expand Down Expand Up @@ -134,16 +134,16 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
}
else if (!jo.fastmem)
else if (jo.memcheck || !jo.fastmem)
{
gpr.Unlock(ARM64Reg::W2);
gpr.Unlock(ARM64Reg::W0);
}
}

Expand Down Expand Up @@ -203,15 +203,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
}

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Lock(ARM64Reg::W3);

constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg addr_reg = ARM64Reg::W2;

if (inst.RA || update) // Always uses the register on update
{
Expand Down Expand Up @@ -243,11 +243,11 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();

// Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;

u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
Expand Down Expand Up @@ -283,10 +283,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize && !have_single)
fpr.Unlock(VS);

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize || jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize)
Expand Down
98 changes: 50 additions & 48 deletions Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
Expand Up @@ -502,18 +502,19 @@ void JitArm64::GenerateFPRF(bool single)

void JitArm64::GenerateQuantizedLoads()
{
// X0 is the address
// X1 is the scale
// X2 is a temporary
// X0 is a temporary
// X1 is the address
// X2 is the scale
// X3 is a temporary (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the return
// Q1 is a temporary
ARM64Reg addr_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2, 3};
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
ARM64Reg scale_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{0};
gprs_to_push &= ~BitSet32{1};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);

Expand All @@ -526,7 +527,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);

RET(ARM64Reg::X30);
}
Expand All @@ -542,8 +543,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(ARM64Reg::X0, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X0, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -560,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -577,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -594,8 +595,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -607,7 +608,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);

RET(ARM64Reg::X30);
}
Expand All @@ -623,8 +624,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -641,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -658,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand All @@ -675,8 +676,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);

const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_dequantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
Expand Down Expand Up @@ -711,18 +712,19 @@ void JitArm64::GenerateQuantizedLoads()

void JitArm64::GenerateQuantizedStores()
{
// X0 is the scale
// X1 is the address
// X2 is a temporary
// X0 is a temporary
// X1 is the scale
// X2 is the address
// X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the register
// Q1 is a temporary
ARM64Reg scale_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
ARM64Reg addr_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
gprs_to_push &= ~BitSet32{2};
if (!jo.fastmem)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
Expand All @@ -743,8 +745,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU8 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);

Expand All @@ -762,8 +764,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS8 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);

Expand All @@ -781,8 +783,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU16 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);

Expand All @@ -799,8 +801,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);

Expand Down Expand Up @@ -828,8 +830,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);

Expand All @@ -847,8 +849,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS8 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);

Expand All @@ -866,8 +868,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);

Expand All @@ -884,8 +886,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS16 = GetCodePtr();
{
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
const s32 load_offset = MOVPage2R(temp_reg, &m_quantizeTableS);
ADD(scale_reg, temp_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);

Expand Down
73 changes: 12 additions & 61 deletions Source/Core/Core/PowerPC/MMU.cpp
Expand Up @@ -1681,100 +1681,51 @@ std::optional<u32> MMU::GetTranslatedAddress(u32 address)
return std::optional<u32>(result.address);
}

void ClearDCacheLineFromJit64(MMU& mmu, u32 address)
void ClearDCacheLineFromJit(MMU& mmu, u32 address)
{
mmu.ClearDCacheLine(address);
}
u32 ReadU8ZXFromJit64(MMU& mmu, u32 address)
u32 ReadU8FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U8(address);
}
u32 ReadU16ZXFromJit64(MMU& mmu, u32 address)
u32 ReadU16FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U16(address);
}
u32 ReadU32FromJit64(MMU& mmu, u32 address)
u32 ReadU32FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U32(address);
}
u64 ReadU64FromJit64(MMU& mmu, u32 address)
u64 ReadU64FromJit(MMU& mmu, u32 address)
{
return mmu.Read_U64(address);
}
void WriteU8FromJit64(MMU& mmu, u32 var, u32 address)
void WriteU8FromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U8(var, address);
}
void WriteU16FromJit64(MMU& mmu, u32 var, u32 address)
void WriteU16FromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U16(var, address);
}
void WriteU32FromJit64(MMU& mmu, u32 var, u32 address)
void WriteU32FromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U32(var, address);
}
void WriteU64FromJit64(MMU& mmu, u64 var, u32 address)
void WriteU64FromJit(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64(var, address);
}
void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address)
void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U16_Swap(var, address);
}
void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address)
void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address)
{
mmu.Write_U32_Swap(var, address);
}
void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64_Swap(var, address);
}

void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu)
{
mmu.ClearDCacheLine(address);
}
u8 ReadU8FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U8(address);
}
u16 ReadU16FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U16(address);
}
u32 ReadU32FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U32(address);
}
u64 ReadU64FromJitArm64(u32 address, MMU& mmu)
{
return mmu.Read_U64(address);
}
void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U8(var, address);
}
void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U16(var, address);
}
void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U32(var, address);
}
void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu)
{
mmu.Write_U64(var, address);
}
void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U16_Swap(var, address);
}
void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu)
{
mmu.Write_U32_Swap(var, address);
}
void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu)
void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address)
{
mmu.Write_U64_Swap(var, address);
}
Expand Down
40 changes: 12 additions & 28 deletions Source/Core/Core/PowerPC/MMU.h
Expand Up @@ -328,32 +328,16 @@ class MMU
BatTable m_dbat_table;
};

void ClearDCacheLineFromJit64(MMU& mmu, u32 address);
u32 ReadU8ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU16ZXFromJit64(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU32FromJit64(MMU& mmu, u32 address);
u64 ReadU64FromJit64(MMU& mmu, u32 address);
void WriteU8FromJit64(MMU& mmu, u32 var, u32 address);
void WriteU16FromJit64(MMU& mmu, u32 var, u32 address);
void WriteU32FromJit64(MMU& mmu, u32 var, u32 address);
void WriteU64FromJit64(MMU& mmu, u64 var, u32 address);
void WriteU16SwapFromJit64(MMU& mmu, u32 var, u32 address);
void WriteU32SwapFromJit64(MMU& mmu, u32 var, u32 address);
void WriteU64SwapFromJit64(MMU& mmu, u64 var, u32 address);

// The JitArm64 function that calls these has very specific register allocation that's difficult to
// change, so we have a separate set of functions here for it. This can probably be refactored in
// the future.
void ClearDCacheLineFromJitArm64(u32 address, MMU& mmu);
u8 ReadU8FromJitArm64(u32 address, MMU& mmu);
u16 ReadU16FromJitArm64(u32 address, MMU& mmu);
u32 ReadU32FromJitArm64(u32 address, MMU& mmu);
u64 ReadU64FromJitArm64(u32 address, MMU& mmu);
void WriteU8FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU16FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU32FromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU64FromJitArm64(u64 var, u32 address, MMU& mmu);
void WriteU16SwapFromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU32SwapFromJitArm64(u32 var, u32 address, MMU& mmu);
void WriteU64SwapFromJitArm64(u64 var, u32 address, MMU& mmu);
void ClearDCacheLineFromJit(MMU& mmu, u32 address);
u32 ReadU8FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU16FromJit(MMU& mmu, u32 address); // Returns zero-extended 32bit value
u32 ReadU32FromJit(MMU& mmu, u32 address);
u64 ReadU64FromJit(MMU& mmu, u32 address);
void WriteU8FromJit(MMU& mmu, u32 var, u32 address);
void WriteU16FromJit(MMU& mmu, u32 var, u32 address);
void WriteU32FromJit(MMU& mmu, u32 var, u32 address);
void WriteU64FromJit(MMU& mmu, u64 var, u32 address);
void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address);
void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address);
void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address);
} // namespace PowerPC