Skip to content

Commit

Permalink
Jit64: Avoid System::GetInstance() and ppcState.
Browse files Browse the repository at this point in the history
  • Loading branch information
AdmiralCurtiss committed Mar 21, 2023
1 parent 3e6886c commit 72eb8b0
Show file tree
Hide file tree
Showing 17 changed files with 290 additions and 278 deletions.
166 changes: 79 additions & 87 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit.h
Expand Up @@ -260,6 +260,8 @@ class Jit64 : public JitBase, public QuantizedMemoryRoutines

void ResetFreeMemoryRanges();

static void ImHere(Jit64& jit);

JitBlockCache blocks{*this};
TrampolineCache trampolines{*this};

Expand All @@ -270,6 +272,10 @@ class Jit64 : public JitBase, public QuantizedMemoryRoutines

HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far;

const bool m_im_here_debug = false;
const bool m_im_here_log = false;
std::map<u32, int> m_been_here;
};

void LogGeneratedX86(size_t size, const PPCAnalyst::CodeBuffer& code_buffer, const u8* normalEntry,
Expand Down
32 changes: 17 additions & 15 deletions Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
Expand Up @@ -20,7 +20,7 @@

using namespace Gen;

Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit), m_jit{jit}
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
{
}

Expand All @@ -45,11 +45,13 @@ void Jit64AsmRoutineManager::Generate()
// waste a bit of space for a second shadow, but whatever.
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16);

auto& ppc_state = m_jit.m_ppc_state;

// Two statically allocated registers.
// MOV(64, R(RMEM), Imm64((u64)Memory::physical_base));
MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80));
MOV(64, R(RPPCSTATE), Imm64((u64)&ppc_state + 0x80));

MOV(64, PPCSTATE(stored_stack_pointer), R(RSP));
MOV(64, PPCSTATE(ppc_state, stored_stack_pointer), R(RSP));

// something that can't pass the BLR test
MOV(64, MDisp(RSP, 8), Imm32((u32)-1));
Expand All @@ -60,7 +62,7 @@ void Jit64AsmRoutineManager::Generate()
ABI_PopRegistersAndAdjustStack({}, 0);
FixupBranch skipToRealDispatch = J(enable_debugging); // skip the sync and compare first time
dispatcher_mispredicted_blr = GetCodePtr();
AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC));
AND(32, PPCSTATE(ppc_state, pc), Imm32(0xFFFFFFFC));

#if 0 // debug mispredicts
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
Expand All @@ -71,17 +73,17 @@ void Jit64AsmRoutineManager::Generate()

ResetStack(*this);

SUB(32, PPCSTATE(downcount), R(RSCRATCH2));
SUB(32, PPCSTATE(ppc_state, downcount), R(RSCRATCH2));

dispatcher = GetCodePtr();

// Expected result of SUB(32, PPCSTATE(downcount), Imm32(block_cycles)) is in RFLAGS.
// Expected result of SUB(32, PPCSTATE(ppc_state, downcount), Imm32(block_cycles)) is in RFLAGS.
// Branch if downcount is <= 0 (signed).
FixupBranch bail = J_CC(CC_LE, true);

dispatcher_no_timing_check = GetCodePtr();

auto& system = Core::System::GetInstance();
auto& system = m_jit.m_system;

FixupBranch dbg_exit;
if (enable_debugging)
Expand All @@ -103,7 +105,7 @@ void Jit64AsmRoutineManager::Generate()
{
// Fast block number lookup.
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, R(RSCRATCH), PPCSTATE(ppc_state, pc));
// Keep a copy for later.
MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMap());
Expand All @@ -123,7 +125,7 @@ void Jit64AsmRoutineManager::Generate()
FixupBranch not_found = J_CC(CC_Z);

// Check both block.effectiveAddress and block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
MOV(32, R(RSCRATCH2), PPCSTATE(ppc_state, msr));
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(32));
// RSCRATCH_EXTRA still has the PC.
Expand All @@ -134,7 +136,7 @@ void Jit64AsmRoutineManager::Generate()

// Success; branch to the block we found.
// Switch to the correct memory base, in case MSR.DR has changed.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
TEST(32, PPCSTATE(ppc_state, msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_Z);
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, normalEntry))));
Expand All @@ -158,7 +160,7 @@ void Jit64AsmRoutineManager::Generate()
FixupBranch no_block_available = J_CC(CC_Z);

// Switch to the correct memory base, in case MSR.DR has changed.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
TEST(32, PPCSTATE(ppc_state, msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_Z);
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
JMPptr(R(ABI_RETURN));
Expand All @@ -176,7 +178,7 @@ void Jit64AsmRoutineManager::Generate()

ABI_PushRegistersAndAdjustStack({}, 0);
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&m_jit)));
MOV(32, R(ABI_PARAM2), PPCSTATE(pc));
MOV(32, R(ABI_PARAM2), PPCSTATE(ppc_state, pc));
ABI_CallFunction(JitTrampoline);
ABI_PopRegistersAndAdjustStack({}, 0);

Expand All @@ -186,8 +188,8 @@ void Jit64AsmRoutineManager::Generate()
do_timing = GetCodePtr();

// make sure npc contains the next pc (needed for exception checking in CoreTiming::Advance)
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
MOV(32, R(RSCRATCH), PPCSTATE(ppc_state, pc));
MOV(32, PPCSTATE(ppc_state, npc), R(RSCRATCH));

// Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice
Expand All @@ -214,7 +216,7 @@ void Jit64AsmRoutineManager::Generate()

void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter)
{
emitter.MOV(64, R(RSP), PPCSTATE(stored_stack_pointer));
emitter.MOV(64, R(RSP), PPCSTATE(m_jit.m_ppc_state, stored_stack_pointer));
}

void Jit64AsmRoutineManager::GenerateCommon()
Expand Down
4 changes: 0 additions & 4 deletions Source/Core/Core/PowerPC/Jit64/JitAsm.h
Expand Up @@ -11,8 +11,6 @@ namespace Gen
class X64CodeBlock;
}

class JitBase;

// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
// code at runtime. In the case of fixed code like this, after writing it, we write
// protect the memory, essentially making it work just like precompiled code.
Expand Down Expand Up @@ -43,6 +41,4 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
private:
void Generate();
void GenerateCommon();

JitBase& m_jit;
};
38 changes: 19 additions & 19 deletions Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
Expand Up @@ -33,9 +33,9 @@ void Jit64::sc(UGeckoInstruction inst)

gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4));
MOV(32, PPCSTATE(m_ppc_state, pc), Imm32(js.compilerPC + 4));
LOCK();
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL));
OR(32, PPCSTATE(m_ppc_state, Exceptions), Imm32(EXCEPTION_SYSCALL));
WriteExceptionExit();
}

Expand All @@ -50,12 +50,12 @@ void Jit64::rfi(UGeckoInstruction inst)
const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
// MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13;
AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13));
MOV(32, R(RSCRATCH), PPCSTATE_SRR1);
AND(32, PPCSTATE(m_ppc_state, msr), Imm32((~mask) & clearMSR13));
MOV(32, R(RSCRATCH), PPCSTATE_SRR1(m_ppc_state));
AND(32, R(RSCRATCH), Imm32(mask & clearMSR13));
OR(32, PPCSTATE(msr), R(RSCRATCH));
OR(32, PPCSTATE(m_ppc_state, msr), R(RSCRATCH));
// NPC = SRR0;
MOV(32, R(RSCRATCH), PPCSTATE_SRR0);
MOV(32, R(RSCRATCH), PPCSTATE_SRR0(m_ppc_state));
WriteRfiExitDestInRSCRATCH();
}

Expand All @@ -67,7 +67,7 @@ void Jit64::bx(UGeckoInstruction inst)
// We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
MOV(32, PPCSTATE_LR(m_ppc_state), Imm32(js.compilerPC + 4));

// If this is not the last instruction of a block,
// we will skip the rest process.
Expand All @@ -89,7 +89,7 @@ void Jit64::bx(UGeckoInstruction inst)

#ifdef ACID_TEST
if (inst.LK)
AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
AND(32, PPCSTATE(m_ppc_state, cr), Imm32(~(0xFF000000)));
#endif
if (js.op->branchIsIdleLoop)
{
Expand All @@ -114,7 +114,7 @@ void Jit64::bcx(UGeckoInstruction inst)
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
SUB(32, PPCSTATE_CTR, Imm8(1));
SUB(32, PPCSTATE_CTR(m_ppc_state), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = J_CC(CC_NZ, true);
else
Expand All @@ -129,7 +129,7 @@ void Jit64::bcx(UGeckoInstruction inst)
}

if (inst.LK)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
MOV(32, PPCSTATE_LR(m_ppc_state), Imm32(js.compilerPC + 4));

// If this is not the last instruction of a block
// and an unconditional branch, we will skip the rest process.
Expand Down Expand Up @@ -193,9 +193,9 @@ void Jit64::bcctrx(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();

MOV(32, R(RSCRATCH), PPCSTATE_CTR);
MOV(32, R(RSCRATCH), PPCSTATE_CTR(m_ppc_state));
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
MOV(32, PPCSTATE_LR(m_ppc_state), Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
}
Expand All @@ -208,11 +208,11 @@ void Jit64::bcctrx(UGeckoInstruction inst)

FixupBranch b =
JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE));
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
MOV(32, R(RSCRATCH), PPCSTATE_CTR(m_ppc_state));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
// MOV(32, PPCSTATE(pc), R(RSCRATCH)); => Already done in WriteExitDestInRSCRATCH()
// MOV(32, PPCSTATE(m_ppc_state, pc), R(RSCRATCH)); => Already done in WriteExitDestInRSCRATCH()
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
MOV(32, PPCSTATE_LR(m_ppc_state), Imm32(js.compilerPC + 4)); // LR = PC + 4;

{
RCForkGuard gpr_guard = gpr.Fork();
Expand Down Expand Up @@ -241,7 +241,7 @@ void Jit64::bclrx(UGeckoInstruction inst)
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
SUB(32, PPCSTATE_CTR, Imm8(1));
SUB(32, PPCSTATE_CTR(m_ppc_state), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = J_CC(CC_NZ, true);
else
Expand All @@ -258,10 +258,10 @@ void Jit64::bclrx(UGeckoInstruction inst)
// This below line can be used to prove that blr "eats flags" in practice.
// This observation could let us do some useful optimizations.
#ifdef ACID_TEST
AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
AND(32, PPCSTATE(m_ppc_state, cr), Imm32(~(0xFF000000)));
#endif

MOV(32, R(RSCRATCH), PPCSTATE_LR);
MOV(32, R(RSCRATCH), PPCSTATE_LR(m_ppc_state));
// We don't have to do this because WriteBLRExit handles it for us. Specifically, since we only
// ever push
// divisible-by-four instruction addresses onto the stack, if the return address matches, we're
Expand All @@ -270,7 +270,7 @@ void Jit64::bclrx(UGeckoInstruction inst)
if (!m_enable_blr_optimization)
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (inst.LK)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
MOV(32, PPCSTATE_LR(m_ppc_state), Imm32(js.compilerPC + 4));

{
RCForkGuard gpr_guard = gpr.Fork();
Expand Down
12 changes: 6 additions & 6 deletions Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
Expand Up @@ -710,7 +710,7 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
RegCache::Realize(Ra, Rb);

if (fprf)
AND(32, PPCSTATE(fpscr), Imm32(~FPCC_MASK));
AND(32, PPCSTATE(m_ppc_state, fpscr), Imm32(~FPCC_MASK));

if (upper)
{
Expand Down Expand Up @@ -745,15 +745,15 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
MOV(64, R(RSCRATCH),
Imm64(PowerPC::ConditionRegister::PPCToInternal(output[PowerPC::CR_EQ_BIT])));
if (fprf)
OR(32, PPCSTATE(fpscr), Imm32(PowerPC::CR_EQ << FPRF_SHIFT));
OR(32, PPCSTATE(m_ppc_state, fpscr), Imm32(PowerPC::CR_EQ << FPRF_SHIFT));

continue1 = J();

SetJumpTarget(pNaN);
MOV(64, R(RSCRATCH),
Imm64(PowerPC::ConditionRegister::PPCToInternal(output[PowerPC::CR_SO_BIT])));
if (fprf)
OR(32, PPCSTATE(fpscr), Imm32(PowerPC::CR_SO << FPRF_SHIFT));
OR(32, PPCSTATE(m_ppc_state, fpscr), Imm32(PowerPC::CR_SO << FPRF_SHIFT));

if (a != b)
{
Expand All @@ -763,14 +763,14 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
MOV(64, R(RSCRATCH),
Imm64(PowerPC::ConditionRegister::PPCToInternal(output[PowerPC::CR_GT_BIT])));
if (fprf)
OR(32, PPCSTATE(fpscr), Imm32(PowerPC::CR_GT << FPRF_SHIFT));
OR(32, PPCSTATE(m_ppc_state, fpscr), Imm32(PowerPC::CR_GT << FPRF_SHIFT));
continue3 = J();

SetJumpTarget(pLesser);
MOV(64, R(RSCRATCH),
Imm64(PowerPC::ConditionRegister::PPCToInternal(output[PowerPC::CR_LT_BIT])));
if (fprf)
OR(32, PPCSTATE(fpscr), Imm32(PowerPC::CR_LT << FPRF_SHIFT));
OR(32, PPCSTATE(m_ppc_state, fpscr), Imm32(PowerPC::CR_LT << FPRF_SHIFT));
}

SetJumpTarget(continue1);
Expand All @@ -780,7 +780,7 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
SetJumpTarget(continue3);
}

MOV(64, PPCSTATE(cr.fields[crf]), R(RSCRATCH));
MOV(64, PPCSTATE(m_ppc_state, cr.fields[crf]), R(RSCRATCH));
}

void Jit64::fcmpX(UGeckoInstruction inst)
Expand Down

0 comments on commit 72eb8b0

Please sign in to comment.