@@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
@@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;

@@ -129,10 +133,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate)
mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size);

if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0));
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use,
fprs_in_use);
}
else if (mmio_address)
{
@@ -142,7 +147,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use);
}

gpr.BindToRegister(dest, false, true);
@@ -156,13 +161,17 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}

void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

ARM64Reg RS = gpr.R(value);

@@ -258,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;

u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@@ -290,10 +301,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s

js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0));
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use);
}
else if (mmio_address)
{
@@ -303,7 +314,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use);
}

if (update && !early_update)
@@ -314,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}

FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@@ -496,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

// MMU games make use of a >= d despite this being invalid according to the PEM.
// Because of this, make sure to not re-read rA after starting doing the loads.
@@ -521,13 +536,15 @@ void JitArm64::lmw(UGeckoInstruction inst)

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;

EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, EncodeRegTo64(addr_reg),
regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);

gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i));
@@ -537,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}

void JitArm64::stmw(UGeckoInstruction inst)
@@ -548,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

ARM64Reg addr_reg = ARM64Reg::W1;
if (a)
@@ -571,17 +592,21 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;

EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, src_reg, EncodeRegTo64(addr_reg),
regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);

if (i != 31)
ADD(addr_reg, addr_reg, 4);
}

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}

void JitArm64::dcbx(UGeckoInstruction inst)
@@ -770,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); });
Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
});

constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
@@ -838,8 +869,10 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;

EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, jo.fastmem, jo.fastmem, ARM64Reg::W0,
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);

if (using_dcbz_hack)
@@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)

gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0;
@@ -166,17 +168,19 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;

if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0));
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use);
}
else
{
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use);
}

const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
@@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}

void JitArm64::stfXX(UGeckoInstruction inst)
@@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
}

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);

ARM64Reg addr_reg = ARM64Reg::W1;

@@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;

if (is_immediate)
@@ -389,21 +399,21 @@ void JitArm64::stfXX(UGeckoInstruction inst)
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr))
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0));
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use);
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use);
}
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use);
}

if (update && !early_update)
@@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}
@@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
}
else if (!jo.fastmem_arena)
{
gpr.Lock(ARM64Reg::W2);
}

constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
@@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem_arena)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0;
@@ -90,7 +96,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;

EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use,
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use);
}
else
@@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
}
else if (!jo.fastmem_arena)
{
gpr.Unlock(ARM64Reg::W2);
}
}

void JitArm64::psq_stXX(UGeckoInstruction inst)
@@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W3);

constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
@@ -229,12 +241,14 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem_arena)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;

u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;

EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use,
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use);
}
else
@@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q1);
}
}
@@ -93,10 +93,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
FixupBranch membaseend = B();
SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
SetJumpTarget(membaseend);

// iCache[(address >> 2) & iCache_Mask];
@@ -141,10 +141,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags and jump to next block.
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
BR(ARM64Reg::X0);
SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
BR(ARM64Reg::X0);

// Call JIT
@@ -510,7 +510,7 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);

RET(ARM64Reg::X30);
@@ -520,8 +520,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@@ -538,8 +538,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@@ -556,8 +556,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@@ -573,8 +573,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@@ -591,7 +591,7 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1}, fprs_to_push, true);

RET(ARM64Reg::X30);
@@ -601,8 +601,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@@ -619,8 +619,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@@ -637,8 +637,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@@ -654,8 +654,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores()
// X0 is the scale
// X1 is the address
// X2 is a temporary
// X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the register
// Q1 is a temporary
@@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores()
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
if (!jo.fastmem_arena)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);

@@ -718,8 +721,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -737,8 +740,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -756,8 +759,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -774,8 +777,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -792,8 +795,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -803,8 +806,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -822,8 +825,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -841,8 +844,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -859,8 +862,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -877,8 +880,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;

EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg,
gprs_to_push, fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);

RET(ARM64Reg::X30);
}
@@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address);

constexpr int BAT_INDEX_SHIFT = 17;
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT);
constexpr u32 BAT_MAPPED_BIT = 0x1;
constexpr u32 BAT_PHYSICAL_BIT = 0x2;
constexpr u32 BAT_WI_BIT = 0x4;
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7);
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB
using BatTable = std::array<u32, BAT_PAGE_COUNT>; // 128 KB
extern BatTable ibat_table;
extern BatTable dbat_table;
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)