Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #9884 from JosJuice/jitarm64-paired-loadstore-addr
JitArm64: Improve psq_l/psq_st address checking
  • Loading branch information
JMC47 committed Oct 11, 2021
2 parents eee302c + 15eb561 commit 3bfb3fa
Show file tree
Hide file tree
Showing 12 changed files with 561 additions and 513 deletions.
2 changes: 2 additions & 0 deletions Source/Core/Common/Arm64Emitter.h
Expand Up @@ -1155,6 +1155,8 @@ class ARM64XEmitter
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);

// ABI related
static constexpr BitSet32 CALLER_SAVED_GPRS = BitSet32(0x4007FFFF);
static constexpr BitSet32 CALLER_SAVED_FPRS = BitSet32(0xFFFF00FF);
void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));

Expand Down
8 changes: 5 additions & 3 deletions Source/Core/Core/PowerPC/JitArm64/Jit.h
Expand Up @@ -225,17 +225,18 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void DumpCode(const u8* start, const u8* end);

// Backpatching routines
bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg);
void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0));
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
// Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
// If lookup succeeds, writes upper 15 bits of physical address to addr_out. If not,
// jumps to the returned FixupBranch. Clobbers tmp and the 17 lower bits of addr_out.
Arm64Gen::FixupBranch BATAddressLookup(Arm64Gen::ARM64Reg addr_out, Arm64Gen::ARM64Reg addr_in,
Arm64Gen::ARM64Reg tmp, const void* bat_table);
Arm64Gen::FixupBranch CheckIfSafeAddress(Arm64Gen::ARM64Reg addr, Arm64Gen::ARM64Reg tmp1,
Arm64Gen::ARM64Reg tmp2);

void DoJit(u32 em_address, JitBlock* b, u32 nextPC);

Expand All @@ -253,7 +254,8 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void GenerateConvertDoubleToSingle();
void GenerateConvertSingleToDouble();
void GenerateFPRF(bool single);
void GenerateQuantizedLoadStores();
void GenerateQuantizedLoads();
void GenerateQuantizedStores();

// Profiling
void BeginTimeProfile(JitBlock* b);
Expand Down
210 changes: 118 additions & 92 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp
Expand Up @@ -3,12 +3,14 @@

#include <cinttypes>
#include <cstddef>
#include <optional>
#include <string>

#include "Common/BitSet.h"
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"
#include "Common/StringUtil.h"
#include "Common/Swap.h"

Expand Down Expand Up @@ -51,48 +53,41 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
}

void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, ARM64Reg RS,
ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push)
ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push,
bool emitting_routine)
{
bool in_far_code = false;
const u8* fastmem_start = GetCodePtr();
std::optional<FixupBranch> slowmem_fixup;

if (fastmem)
{
if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT)
if (do_farcode && emitting_routine)
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.REV32(8, ARM64Reg::D0, RS);
m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
{
m_float_emit.REV32(8, ARM64Reg::D0, RS);
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
}
else
{
m_float_emit.REV64(8, ARM64Reg::Q0, RS);
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
}
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2;

slowmem_fixup = CheckIfSafeAddress(addr, temp1, temp2);
}
else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT)

if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.LDR(32, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
else
{
m_float_emit.LDR(64, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
ARM64Reg temp = ARM64Reg::D0;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);

m_float_emit.STR(BackPatchInfo::GetFlagSize(flags), temp, MEM_REG, addr);
}
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{
m_float_emit.LDR(BackPatchInfo::GetFlagSize(flags), EncodeRegToDouble(RS), MEM_REG, addr);

ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
true, false);
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg temp = ARM64Reg::W0;
temp = ByteswapBeforeStore(this, temp, RS, flags, true);
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);

if (flags & BackPatchInfo::FLAG_SIZE_32)
STR(temp, MEM_REG, addr);
Expand All @@ -118,7 +113,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
else if (flags & BackPatchInfo::FLAG_SIZE_8)
LDRB(RS, MEM_REG, addr);

ByteswapAfterLoad(this, RS, RS, flags, true, false);
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
}
}
const u8* fastmem_end = GetCodePtr();
Expand All @@ -127,83 +122,81 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
{
if (fastmem && do_farcode)
{
SlowmemHandler handler;
handler.dest_reg = RS;
handler.addr_reg = addr;
handler.gprs = gprs_to_push;
handler.fprs = fprs_to_push;
handler.flags = flags;

FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
auto handler_loc_iter = m_handler_to_loc.find(handler);

if (handler_loc_iter == m_handler_to_loc.end())
if (emitting_routine)
{
in_far_code = true;
SwitchToFarCode();
const u8* handler_loc = GetCodePtr();
m_handler_to_loc[handler] = handler_loc;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
}
else
{
const u8* handler_loc = handler_loc_iter->second;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
return;
SlowmemHandler handler;
handler.dest_reg = RS;
handler.addr_reg = addr;
handler.gprs = gprs_to_push;
handler.fprs = fprs_to_push;
handler.flags = flags;

FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
auto handler_loc_iter = m_handler_to_loc.find(handler);

if (handler_loc_iter == m_handler_to_loc.end())
{
in_far_code = true;
SwitchToFarCode();
const u8* handler_loc = GetCodePtr();
m_handler_to_loc[handler] = handler_loc;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
}
else
{
const u8* handler_loc = handler_loc_iter->second;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
return;
}
}
}

if (slowmem_fixup)
SetJumpTarget(*slowmem_fixup);

ABI_PushRegisters(gprs_to_push);
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);

if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT)
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
BLR(ARM64Reg::X8);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
{
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
BLR(ARM64Reg::X8);
}
else
{
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
BLR(ARM64Reg::X8);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT)
if (flags & BackPatchInfo::FLAG_STORE)
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
ARM64Reg src_reg = RS;
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0;

if (flags & BackPatchInfo::FLAG_FLOAT)
{
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32);
BLR(ARM64Reg::X8);
m_float_emit.INS(32, RS, 0, ARM64Reg::X0);
if (access_size == 64)
m_float_emit.FMOV(dst_reg, EncodeRegToDouble(RS));
else
m_float_emit.FMOV(dst_reg, EncodeRegToSingle(RS));

src_reg = dst_reg;
}
else

if (flags & BackPatchInfo::FLAG_PAIR)
{
MOVP2R(ARM64Reg::X8, &PowerPC::Read_F64);
BLR(ARM64Reg::X8);
m_float_emit.INS(64, RS, 0, ARM64Reg::X0);
// Compensate for the Write_ functions swapping the whole write instead of each pair
SwapPairs(this, dst_reg, src_reg, flags);
src_reg = dst_reg;
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
MOV(ARM64Reg::W0, RS);

if (dst_reg != src_reg)
MOV(dst_reg, src_reg);

const bool reverse = (flags & BackPatchInfo::FLAG_REVERSE) != 0;

if (flags & BackPatchInfo::FLAG_SIZE_32)
if (access_size == 64)
MOVP2R(ARM64Reg::X8, reverse ? &PowerPC::Write_U64_Swap : &PowerPC::Write_U64);
else if (access_size == 32)
MOVP2R(ARM64Reg::X8, reverse ? &PowerPC::Write_U32_Swap : &PowerPC::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
else if (access_size == 16)
MOVP2R(ARM64Reg::X8, reverse ? &PowerPC::Write_U16_Swap : &PowerPC::Write_U16);
else
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U8);
Expand All @@ -217,16 +210,40 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
const u32 access_size = BackPatchInfo::GetFlagSize(flags);

if (access_size == 64)
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U64);
else if (access_size == 32)
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
else if (access_size == 16)
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
else
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U8);

BLR(ARM64Reg::X8);

ByteswapAfterLoad(this, RS, ARM64Reg::W0, flags, false, false);
ARM64Reg src_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0;

if (flags & BackPatchInfo::FLAG_PAIR)
{
// Compensate for the Read_ functions swapping the whole read instead of each pair
const ARM64Reg dst_reg = flags & BackPatchInfo::FLAG_FLOAT ? src_reg : RS;
SwapPairs(this, dst_reg, src_reg, flags);
src_reg = dst_reg;
}

if (flags & BackPatchInfo::FLAG_FLOAT)
{
if (access_size == 64)
m_float_emit.FMOV(EncodeRegToDouble(RS), src_reg);
else
m_float_emit.FMOV(EncodeRegToSingle(RS), src_reg);

src_reg = RS;
}

ByteswapAfterLoad(this, &m_float_emit, RS, src_reg, flags, false, false);
}

m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
Expand All @@ -235,8 +252,17 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR

if (in_far_code)
{
RET(ARM64Reg::X30);
SwitchToNearCode();
if (emitting_routine)
{
FixupBranch done = B();
SwitchToNearCode();
SetJumpTarget(done);
}
else
{
RET(ARM64Reg::X30);
SwitchToNearCode();
}
}
}

Expand Down
16 changes: 15 additions & 1 deletion Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Expand Up @@ -240,7 +240,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));

ARM64Reg temp = ARM64Reg::W1;
temp = ByteswapBeforeStore(this, temp, RS, flags, true);
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);

if (accessSize == 32)
STR(IndexType::Post, temp, ARM64Reg::X0, 4);
Expand Down Expand Up @@ -288,6 +288,20 @@ FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM6
return fail;
}

FixupBranch JitArm64::CheckIfSafeAddress(Arm64Gen::ARM64Reg addr, Arm64Gen::ARM64Reg tmp1,
Arm64Gen::ARM64Reg tmp2)
{
tmp2 = EncodeRegTo64(tmp2);

MOVP2R(tmp2, PowerPC::dbat_table.data());
LSR(tmp1, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(tmp1, tmp2, ArithOption(tmp1, true));
FixupBranch pass = TBNZ(tmp1, IntLog2(PowerPC::BAT_PHYSICAL_BIT));
FixupBranch fail = B();
SetJumpTarget(pass);
return fail;
}

void JitArm64::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
Expand Down

0 comments on commit 3bfb3fa

Please sign in to comment.