94 changes: 64 additions & 30 deletions Source/Core/Core/PowerPC/JitArm64/Jit.cpp
Expand Up @@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);

if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
// also flush the program counter
ARM64Reg WA = gpr.GetReg();
Expand All @@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
fpr.ResetRegisters(js.op->GetFregsOut());
gpr.ResetCRRegisters(js.op->crOut);

if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
if (js.isLastInstruction)
{
Expand Down Expand Up @@ -276,8 +276,7 @@ void JitArm64::Cleanup()
SetJumpTarget(exit);
}

// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON)
{
ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
js.numFloatingPointInst, &m_ppc_state);
Expand Down Expand Up @@ -348,27 +347,61 @@ void JitArm64::EmitUpdateMembase()
LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
}

void JitArm64::EmitStoreMembase(u32 msr)
void JitArm64::MSRUpdated(u32 msr)
{
// Update mem_ptr
auto& memory = m_system.GetMemory();
MOVP2R(MEM_REG,
UReg_MSR(msr).DR ?
(jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()) :
(jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()));
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));

// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
const u32 feature_flags = other_feature_flags | ((msr >> 4) & 0x3);
if (feature_flags == 0)
{
STR(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(feature_flags));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, feature_flags);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
gpr.Unlock(WA);
}
}

void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
void JitArm64::MSRUpdated(ARM64Reg msr)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);

// Update mem_ptr
auto& memory = m_system.GetMemory();
ARM64Reg WD = gpr.GetReg();
ARM64Reg XD = EncodeRegTo64(WD);
MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XD, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), 32));
CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ);
CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
gpr.Unlock(WD);

// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
UBFX(WA, msr, 4, 2);
if (other_feature_flags != 0)
ORR(WA, WA, LogicalImm(32, other_feature_flags));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));

gpr.Unlock(WA);
}

void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,
Expand All @@ -383,20 +416,20 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
const u8* host_address_after_return;
if (LK)
{
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
// Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
else if (feature_flags == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
ARM64Reg::X1);
}
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
Expand Down Expand Up @@ -487,20 +520,20 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
}
else
{
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
// Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
else if (feature_flags == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
ARM64Reg::X1);
}
constexpr s32 adr_offset = sizeof(u32) * 3;
Expand Down Expand Up @@ -558,25 +591,26 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
// function has been called!
gpr.Lock(ARM64Reg::W30);
}
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
// Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg after_reg = ARM64Reg::INVALID_REG;
ARM64Reg reg_to_push;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg);
MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return);
MOVI2R(reg_to_push, feature_flags << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
else if (feature_flags == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
reg_to_push);
}
ARM64Reg code_reg = gpr.GetReg();
constexpr s32 adr_offset = sizeof(u32) * 3;
Expand Down Expand Up @@ -640,16 +674,16 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
Cleanup();
EndTimeProfile(js.curBlock);

// Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC.
// Check if {PPC_PC, feature_flags} matches the current state, then RET to ARM_PC.
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits == 0)
const u64 feature_flags = m_ppc_state.feature_flags;
if (feature_flags == 0)
{
CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC));
}
else
{
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0);
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), feature_flags << 32, ARM64Reg::X0);
CMP(ARM64Reg::X1, ARM64Reg::X0);
}
FixupBranch no_match = B(CC_NEQ);
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/Jit.h
Expand Up @@ -310,8 +310,8 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void EndTimeProfile(JitBlock* b);

void EmitUpdateMembase();
void EmitStoreMembase(u32 msr);
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
void MSRUpdated(u32 msr);
void MSRUpdated(Arm64Gen::ARM64Reg msr);

// Exits
void
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp
Expand Up @@ -64,11 +64,11 @@ void JitArm64::rfi(UGeckoInstruction inst)
ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1

STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA
gpr.Unlock(WB, WC);

EmitStoreMembase(WA);
MSRUpdated(WA);

LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0));
gpr.Unlock(WB, WC);

WriteExceptionExit(WA);
gpr.Unlock(WA);
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Expand Up @@ -727,7 +727,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
// Translate effective address to physical address.
const u8* loop_start = GetCodePtr();
FixupBranch bat_lookup_failed;
if (m_ppc_state.msr.IR)
if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
{
bat_lookup_failed =
BATAddressLookup(physical_addr, effective_addr, WA, m_mmu.GetIBATTable().data());
Expand Down Expand Up @@ -756,7 +756,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)

SwitchToFarCode();
SetJumpTarget(invalidate_needed);
if (m_ppc_state.msr.IR)
if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
SetJumpTarget(bat_lookup_failed);

BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
Expand Down
Expand Up @@ -23,7 +23,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);

// If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR);
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem &&
!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));

// X30 is LR
// X0 is the address
Expand Down Expand Up @@ -151,7 +152,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);

// If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR);
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem &&
!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));

// X30 is LR
// X0 contains the scale
Expand Down
Expand Up @@ -94,12 +94,12 @@ void JitArm64::mtmsr(UGeckoInstruction inst)

const bool imm_value = gpr.IsImm(inst.RS);
if (imm_value)
EmitStoreMembase(gpr.GetImm(inst.RS));
MSRUpdated(gpr.GetImm(inst.RS));

STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr));

if (!imm_value)
EmitStoreMembase(gpr.R(inst.RS));
MSRUpdated(gpr.R(inst.RS));

gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
Expand Down
40 changes: 22 additions & 18 deletions Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
Expand Up @@ -100,15 +100,20 @@ void JitArm64::GenerateAsm()
if (GetBlockCache()->GetEntryPoints())
{
// Check if there is a block
ARM64Reg pc_and_msr = ARM64Reg::X8;
ARM64Reg cache_base = ARM64Reg::X9;
ARM64Reg block = ARM64Reg::X10;
LDR(IndexType::Unsigned, EncodeRegTo32(pc_and_msr), PPC_REG, PPCSTATE_OFF(msr));
ARM64Reg feature_flags = ARM64Reg::W8;
ARM64Reg pc_and_feature_flags = ARM64Reg::X9;
ARM64Reg cache_base = ARM64Reg::X10;
ARM64Reg block = ARM64Reg::X11;

LDR(IndexType::Unsigned, feature_flags, PPC_REG, PPCSTATE_OFF(feature_flags));
MOVP2R(cache_base, GetBlockCache()->GetEntryPoints());
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)).
UBFIZ(pc_and_msr, pc_and_msr, 26, 6);
BFXIL(pc_and_msr, EncodeRegTo64(DISPATCHER_PC), 2, 30);
LDR(block, cache_base, ArithOption(pc_and_msr, true));
// The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)).
// The map contains 8-byte pointers and that means we need to shift feature_flags
// left by 33 bits and pc left by 1 bit to get the correct offset in the map.
LSL(pc_and_feature_flags, EncodeRegTo64(DISPATCHER_PC), 1);
BFI(pc_and_feature_flags, EncodeRegTo64(feature_flags), 33, 31);
LDR(block, cache_base, pc_and_feature_flags);

FixupBranch not_found = CBZ(block);
BR(block);
SetJumpTarget(not_found);
Expand All @@ -119,8 +124,8 @@ void JitArm64::GenerateAsm()
ARM64Reg cache_base = ARM64Reg::X9;
ARM64Reg block = ARM64Reg::X10;
ARM64Reg pc = ARM64Reg::W11;
ARM64Reg msr = ARM64Reg::W12;
ARM64Reg msr2 = ARM64Reg::W13;
ARM64Reg feature_flags = ARM64Reg::W12;
ARM64Reg feature_flags_2 = ARM64Reg::W13;
ARM64Reg entry = ARM64Reg::X14;

// iCache[(address >> 2) & iCache_Mask];
Expand All @@ -130,25 +135,24 @@ void JitArm64::GenerateAsm()
LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true));
FixupBranch not_found = CBZ(block);

// b.effectiveAddress != addr || b.msrBits != msr
static_assert(offsetof(JitBlockData, msrBits) + 4 ==
// b.effectiveAddress != addr || b.feature_flags != feature_flags
static_assert(offsetof(JitBlockData, feature_flags) + 4 ==
offsetof(JitBlockData, effectiveAddress));
LDP(IndexType::Signed, msr, pc, block, offsetof(JitBlockData, msrBits));
LDR(IndexType::Unsigned, msr2, PPC_REG, PPCSTATE_OFF(msr));
LDP(IndexType::Signed, feature_flags, pc, block, offsetof(JitBlockData, feature_flags));
LDR(IndexType::Unsigned, feature_flags_2, PPC_REG, PPCSTATE_OFF(feature_flags));
CMP(pc, DISPATCHER_PC);
FixupBranch pc_mismatch = B(CC_NEQ);

LDR(IndexType::Unsigned, entry, block, offsetof(JitBlockData, normalEntry));
AND(msr2, msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
CMP(msr, msr2);
FixupBranch msr_mismatch = B(CC_NEQ);
CMP(feature_flags, feature_flags_2);
FixupBranch feature_flags_mismatch = B(CC_NEQ);

// return blocks[block_num].normalEntry;
BR(entry);

SetJumpTarget(not_found);
SetJumpTarget(pc_mismatch);
SetJumpTarget(msr_mismatch);
SetJumpTarget(feature_flags_mismatch);
}
}

Expand Down
37 changes: 18 additions & 19 deletions Source/Core/Core/PowerPC/JitCommon/JitCache.cpp
Expand Up @@ -110,7 +110,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
JitBlock& b = block_map.emplace(physical_address, JitBlock())->second;
b.effectiveAddress = em_address;
b.physicalAddress = physical_address;
b.msrBits = m_jit.m_ppc_state.msr.Hex & JIT_CACHE_MSR_MASK;
b.feature_flags = m_jit.m_ppc_state.feature_flags;
b.linkData.clear();
b.fast_block_map_index = 0;
return &b;
Expand All @@ -119,7 +119,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
const std::set<u32>& physical_addresses)
{
size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.msrBits);
size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.feature_flags);
if (m_entry_points_ptr)
m_entry_points_ptr[index] = block.normalEntry;
else
Expand Down Expand Up @@ -159,10 +159,10 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
}
}

JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, CPUEmuFeatureFlags feature_flags)
{
u32 translated_addr = addr;
if (UReg_MSR(msr).IR)
if (feature_flags & FEATURE_FLAG_MSR_IR)
{
auto translated = m_jit.m_mmu.JitCache_TranslateAddress(addr);
if (!translated.valid)
Expand All @@ -176,7 +176,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
for (; iter.first != iter.second; iter.first++)
{
JitBlock& b = iter.first->second;
if (b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK))
if (b.effectiveAddress == addr && b.feature_flags == feature_flags)
return &b;
}

Expand All @@ -189,15 +189,14 @@ const u8* JitBaseBlockCache::Dispatch()
if (m_entry_points_ptr)
{
u8* entry_point =
m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)];
m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)];
if (entry_point)
{
return entry_point;
}
else
{
JitBlock* block =
MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK);
JitBlock* block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags);

if (!block)
return nullptr;
Expand All @@ -207,12 +206,12 @@ const u8* JitBaseBlockCache::Dispatch()
}

JitBlock* block =
m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)];
m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)];

if (!block || block->effectiveAddress != ppc_state.pc ||
block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK))
block->feature_flags != ppc_state.feature_flags)
{
block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK);
block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags);
}

if (!block)
Expand Down Expand Up @@ -374,7 +373,7 @@ void JitBaseBlockCache::LinkBlockExits(JitBlock& block)
{
if (!e.linkStatus)
{
JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.msrBits);
JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.feature_flags);
if (destinationBlock)
{
WriteLinkBlock(e, destinationBlock);
Expand All @@ -393,7 +392,7 @@ void JitBaseBlockCache::LinkBlock(JitBlock& block)

for (JitBlock* b2 : it->second)
{
if (block.msrBits == b2->msrBits)
if (block.feature_flags == b2->feature_flags)
LinkBlockExits(*b2);
}
}
Expand All @@ -412,7 +411,7 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block)
return;
for (JitBlock* sourceBlock : it->second)
{
if (sourceBlock->msrBits != block.msrBits)
if (sourceBlock->feature_flags != block.feature_flags)
continue;

for (auto& e : sourceBlock->linkData)
Expand Down Expand Up @@ -460,9 +459,9 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block)
WriteDestroyBlock(block);
}

JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, CPUEmuFeatureFlags feature_flags)
{
JitBlock* block = GetBlockFromStartAddress(addr, msr);
JitBlock* block = GetBlockFromStartAddress(addr, feature_flags);

if (!block)
return nullptr;
Expand All @@ -484,7 +483,7 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
}

// And create a new one
size_t index = FastLookupIndexForAddress(addr, msr);
size_t index = FastLookupIndexForAddress(addr, feature_flags);
if (m_entry_points_ptr)
m_entry_points_ptr[index] = block->normalEntry;
else
Expand All @@ -494,11 +493,11 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
return block;
}

size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 msr)
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 feature_flags)
{
if (m_entry_points_ptr)
{
return ((msr & JIT_CACHE_MSR_MASK) << 26) | (address >> 2);
return (feature_flags << 30) | (address >> 2);
}
else
{
Expand Down
23 changes: 10 additions & 13 deletions Source/Core/Core/PowerPC/JitCommon/JitCache.h
Expand Up @@ -17,6 +17,7 @@

#include "Common/CommonTypes.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Gekko.h"

class JitBase;

Expand All @@ -33,8 +34,8 @@ struct JitBlockData
// The normal entry point for the block, returned by Dispatch().
u8* normalEntry;

// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
u32 msrBits;
// The features that this block was compiled with support for.
CPUEmuFeatureFlags feature_flags;
// The effective address (PC) for the beginning of the block.
u32 effectiveAddress;
// The physical address of the code represented by this block.
Expand All @@ -48,8 +49,8 @@ struct JitBlockData
// The number of PPC instructions represented by this block. Mostly
// useful for logging.
u32 originalSize;
// This tracks the position if this block within the fast block cache.
// We allow each block to have only one map entry.
// This tracks the position of this block within the fast block cache.
// We only allow each block to have one map entry.
size_t fast_block_map_index;
};
static_assert(std::is_standard_layout_v<JitBlockData>, "JitBlockData must have a standard layout");
Expand Down Expand Up @@ -128,13 +129,9 @@ class ValidBlockBitSet final
class JitBaseBlockCache
{
public:
// Mask for the MSR bits which determine whether a compiled block
// is valid (MSR.IR and MSR.DR, the address translation bits).
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30;

// The value for the map is determined like this:
// ((4 GB guest memory space) / (4 bytes per address) * sizeof(JitBlock*)) * (4 for 2 bits of msr)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000;
// The size of the fast map is determined like this:
// ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1;

Expand All @@ -157,7 +154,7 @@ class JitBaseBlockCache
// Look for the block in the slow but accurate way.
// This function shall be used if FastLookupIndexForAddress() failed.
// This might return nullptr if there is no such block.
JitBlock* GetBlockFromStartAddress(u32 em_address, u32 msr);
JitBlock* GetBlockFromStartAddress(u32 em_address, CPUEmuFeatureFlags feature_flags);

// Get the normal entry for the block associated with the current program
// counter. This will JIT code if necessary. (This is the reference
Expand Down Expand Up @@ -185,7 +182,7 @@ class JitBaseBlockCache
void UnlinkBlock(const JitBlock& block);
void InvalidateICacheInternal(u32 physical_address, u32 address, u32 length, bool forced);

JitBlock* MoveBlockIntoFastCache(u32 em_address, u32 msr);
JitBlock* MoveBlockIntoFastCache(u32 em_address, CPUEmuFeatureFlags feature_flags);

// Fast but risky block lookup based on fast_block_map.
size_t FastLookupIndexForAddress(u32 address, u32 msr);
Expand Down
6 changes: 4 additions & 2 deletions Source/Core/Core/PowerPC/JitInterface.cpp
Expand Up @@ -187,12 +187,14 @@ JitInterface::GetHostCode(u32 address) const
}

auto& ppc_state = m_system.GetPPCState();
JitBlock* block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.msr.Hex);
JitBlock* block =
m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.feature_flags);
if (!block)
{
for (int i = 0; i < 500; i++)
{
block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i, ppc_state.msr.Hex);
block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i,
ppc_state.feature_flags);
if (block)
break;
}
Expand Down
27 changes: 23 additions & 4 deletions Source/Core/Core/PowerPC/PPCAnalyst.cpp
Expand Up @@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func)
func->flags = flags;
}

static bool IsMtspr(UGeckoInstruction inst)
{
return inst.OPCD == 31 && inst.SUBOP10 == 467;
}

static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst)
{
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
return index == SPR_MMCR0 || index == SPR_MMCR1;
}

static bool InstructionCanEndBlock(const CodeOp& op)
{
return (op.opinfo->flags & FL_ENDBLOCK) &&
(!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst));
}

bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
{
const GekkoOPInfo* a_info = a.opinfo;
Expand All @@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
// [1] https://bugs.dolphin-emu.org/issues/5864#note-7
if (a.canCauseException || b.canCauseException)
return false;
if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
if (a.canEndBlock || b.canEndBlock)
return false;
if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
return false;
Expand Down Expand Up @@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,

code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0;
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0;
code->canEndBlock = InstructionCanEndBlock(*code);

code->canCauseException = first_fpu_instruction ||
(opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 ||
Expand Down Expand Up @@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
{
// Just pick the next instruction
address += 4;
if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early
if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early
{
found_exit = true;
break;
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/PPCTables.cpp
Expand Up @@ -374,7 +374,7 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
{210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION},
{242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION},
{339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION},
{371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION},
{512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA},
{595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION},
Expand Down
41 changes: 37 additions & 4 deletions Source/Core/Core/PowerPC/PowerPC.cpp
Expand Up @@ -137,6 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p)
}

RoundingModeUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);

auto& mmu = m_system.GetMMU();
mmu.IBATUpdated();
Expand Down Expand Up @@ -194,8 +195,6 @@ void PowerPCManager::ResetRegisters()
}
m_ppc_state.SetXER({});

RoundingModeUpdated(m_ppc_state);

auto& mmu = m_system.GetMMU();
mmu.DBATUpdated();
mmu.IBATUpdated();
Expand All @@ -208,6 +207,9 @@ void PowerPCManager::ResetRegisters()
m_ppc_state.msr.Hex = 0;
m_ppc_state.spr[SPR_DEC] = 0xFFFFFFFF;
SystemTimers::DecrementerSet();

RoundingModeUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
}

void PowerPCManager::InitializeCPUCore(CPUCore cpu_core)
Expand Down Expand Up @@ -581,15 +583,15 @@ void PowerPCManager::CheckExceptions()
DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT");
m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT;
}

// EXTERNAL INTERRUPT
else
{
// EXTERNAL INTERRUPT
CheckExternalExceptions();
return;
}

m_system.GetJitInterface().UpdateMembase();
MSRUpdated(m_ppc_state);
}

void PowerPCManager::CheckExternalExceptions()
Expand Down Expand Up @@ -642,6 +644,7 @@ void PowerPCManager::CheckExternalExceptions()
ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}",
exceptions);
}
MSRUpdated(m_ppc_state);
}

m_system.GetJitInterface().UpdateMembase();
Expand Down Expand Up @@ -700,6 +703,36 @@ void RoundingModeUpdated(PowerPCState& ppc_state)
Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI);
}

void MSRUpdated(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);

ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3));
}

void MMCRUpdated(PowerPCState& ppc_state)
{
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0));
}

void RecalculateAllFeatureFlags(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);

const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(((ppc_state.msr.Hex >> 4) & 0x3) |
(perfmon ? FEATURE_FLAG_PERFMON : 0));
}

void CheckExceptionsFromJIT(PowerPCManager& power_pc)
{
power_pc.CheckExceptions();
Expand Down
5 changes: 5 additions & 0 deletions Source/Core/Core/PowerPC/PowerPC.h
Expand Up @@ -141,6 +141,8 @@ struct PowerPCState
UReg_MSR msr; // machine state register
UReg_FPSCR fpscr; // floating point flags/status bits

CPUEmuFeatureFlags feature_flags;

// Exception management.
u32 Exceptions = 0;

Expand Down Expand Up @@ -346,5 +348,8 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc);
#define TU(ppc_state) (ppc_state).spr[SPR_TU]

void RoundingModeUpdated(PowerPCState& ppc_state);
void MSRUpdated(PowerPCState& ppc_state);
void MMCRUpdated(PowerPCState& ppc_state);
void RecalculateAllFeatureFlags(PowerPCState& ppc_state);

} // namespace PowerPC
5 changes: 4 additions & 1 deletion Source/Core/DolphinQt/Debugger/RegisterWidget.cpp
Expand Up @@ -448,7 +448,10 @@ void RegisterWidget::PopulateTable()
// MSR
AddRegister(
23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; },
[this](u64 value) { m_system.GetPPCState().msr.Hex = value; });
[this](u64 value) {
m_system.GetPPCState().msr.Hex = value;
PowerPC::MSRUpdated(m_system.GetPPCState());
});

// SRR 0-1
AddRegister(
Expand Down