Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jit: Check MSR state in BLR optimization #12141

Merged
merged 1 commit into from Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 13 additions & 3 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Expand Up @@ -482,7 +482,8 @@ void Jit64::FakeBLCall(u32 after)

// We may need to fake the BLR stack on inlined CALL instructions.
// Else we can't return to this location any more.
MOV(32, R(RSCRATCH2), Imm32(after));
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2);
FixupBranch skip_exit = CALL();
POP(RSCRATCH2);
Expand Down Expand Up @@ -514,7 +515,8 @@ void Jit64::WriteExit(u32 destination, bool bl, u32 after)

if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2);
}

Expand Down Expand Up @@ -571,7 +573,8 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)

if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2);
}

Expand Down Expand Up @@ -599,6 +602,13 @@ void Jit64::WriteBLRExit()
bool disturbed = Cleanup();
if (disturbed)
MOV(32, R(RSCRATCH), PPCSTATE(pc));
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits != 0)
{
MOV(32, R(RSCRATCH2), Imm32(msr_bits));
SHL(64, R(RSCRATCH2), Imm8(32));
lioncash marked this conversation as resolved.
Show resolved Hide resolved
OR(64, R(RSCRATCH), R(RSCRATCH2));
}
MOV(32, R(RSCRATCH2), Imm32(js.downcountAmount));
CMP(64, R(RSCRATCH), MDisp(RSP, 8));
J_CC(CC_NE, asm_routines.dispatcher_mispredicted_blr);
Expand Down
4 changes: 0 additions & 4 deletions Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
Expand Up @@ -445,10 +445,6 @@ void Jit64::mtmsr(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();

// Our jit cache also stores some MSR bits, as they have changed, we either
// have to validate them in the BLR/RET check, or just flush the stack here.
asm_routines.ResetStack(*this);

// If some exceptions are pending and EE are now enabled, force checking
// external exceptions when going out of mtmsr in order to execute delayed
// interrupts as soon as possible.
Expand Down
70 changes: 55 additions & 15 deletions Source/Core/Core/PowerPC/JitArm64/Jit.cpp
Expand Up @@ -386,12 +386,21 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
const u8* host_address_after_return;
if (LK)
{
// Push {ARM_PC; PPC_PC} on the stack
ARM64Reg reg_to_push = exit_address_after_return_reg;
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, exit_address_after_return);
reg_to_push = ARM64Reg::X1;
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ARM64Reg::X1);
}
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
host_address_after_return = GetCodePtr() + adr_offset;
Expand Down Expand Up @@ -481,14 +490,22 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
}
else
{
// Push {ARM_PC, PPC_PC} on the stack
ARM64Reg reg_to_push = exit_address_after_return_reg;
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, exit_address_after_return);
reg_to_push = ARM64Reg::X1;
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ARM64Reg::X1);
}
MOVI2R(ARM64Reg::X1, exit_address_after_return);
constexpr s32 adr_offset = sizeof(u32) * 3;
const u8* host_address_after_return = GetCodePtr() + adr_offset;
ADR(ARM64Reg::X0, adr_offset);
Expand Down Expand Up @@ -544,19 +561,33 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
// function has been called!
gpr.Lock(ARM64Reg::W30);
}
ARM64Reg after_reg = exit_address_after_return_reg;
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
ARM64Reg after_reg = ARM64Reg::INVALID_REG;
ARM64Reg reg_to_push;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
after_reg = gpr.GetReg();
MOVI2R(after_reg, exit_address_after_return);
reg_to_push = EncodeRegTo64(after_reg);
MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push);
}
ARM64Reg code_reg = gpr.GetReg();
constexpr s32 adr_offset = sizeof(u32) * 3;
const u8* host_address_after_return = GetCodePtr() + adr_offset;
ADR(EncodeRegTo64(code_reg), adr_offset);
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
STP(IndexType::Pre, EncodeRegTo64(code_reg), reg_to_push, ARM64Reg::SP, -16);
gpr.Unlock(code_reg);
if (after_reg != exit_address_after_return_reg)
if (after_reg != ARM64Reg::INVALID_REG)
gpr.Unlock(after_reg);

FixupBranch skip_exit = BL();
Expand Down Expand Up @@ -612,9 +643,18 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
Cleanup();
EndTimeProfile(js.curBlock);

// Check if {ARM_PC, PPC_PC} matches the current state.
// Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC.
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
CMP(ARM64Reg::W1, DISPATCHER_PC);
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits == 0)
{
CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC));
}
else
{
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0);
CMP(ARM64Reg::X1, ARM64Reg::X0);
}
FixupBranch no_match = B(CC_NEQ);

DoDownCount(); // overwrites X0 + X1
Expand Down
Expand Up @@ -99,10 +99,6 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);

// Our jit cache also stores some MSR bits, as they have changed, we either
// have to validate them in the BLR/RET check, or just flush the stack here.
ResetStack();

WriteExceptionExit(js.compilerPC + 4, true);
}

Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
Expand Up @@ -50,7 +50,7 @@ void JitArm64::GenerateAsm()
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));

// Push {nullptr; -1} as invalid destination on the stack.
MOVI2R(ARM64Reg::X0, 0xFFFFFFFF);
MOVI2R(ARM64Reg::X0, 0xFFFF'FFFF'FFFF'FFFF);
STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X0, ARM64Reg::SP, -16);

// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
Expand Down