Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JitCache: Support for VMEM + MSR bits #4095

Merged
merged 5 commits into from
Aug 6, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 49 additions & 37 deletions Source/Core/Core/PowerPC/CachedInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ void CachedInterpreter::Init()
jo.enableBlocklink = false;

JitBaseBlockCache::Init();
UpdateMemoryOptions();

code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
Expand All @@ -41,34 +42,29 @@ void CachedInterpreter::Run()

void CachedInterpreter::SingleStep()
{
int block = GetBlockNumberFromStartAddress(PC);
if (block >= 0)
{
Instruction* code = (Instruction*)GetCompiledCodeFromBlock(block);
const u8* normalEntry = jit->GetBlockCache()->Dispatch();
const Instruction* code = reinterpret_cast<const Instruction*>(normalEntry);

while (true)
while (true)
{
switch (code->type)
{
switch (code->type)
{
case Instruction::INSTRUCTION_ABORT:
case Instruction::INSTRUCTION_ABORT:
return;

case Instruction::INSTRUCTION_TYPE_COMMON:
code->common_callback(UGeckoInstruction(code->data));
code++;
break;

case Instruction::INSTRUCTION_TYPE_CONDITIONAL:
bool ret = code->conditional_callback(code->data);
code++;
if (ret)
return;

case Instruction::INSTRUCTION_TYPE_COMMON:
code->common_callback(UGeckoInstruction(code->data));
code++;
break;

case Instruction::INSTRUCTION_TYPE_CONDITIONAL:
bool ret = code->conditional_callback(code->data);
code++;
if (ret)
return;
break;
}
break;
}
}

Jit(PC);
}

static void EndBlock(UGeckoInstruction data)
Expand All @@ -87,14 +83,30 @@ static void WritePC(UGeckoInstruction data)
NPC = data.hex + 4;
}

static void WriteBrokenBlockNPC(UGeckoInstruction data)
{
NPC = data.hex;
}

static bool CheckFPU(u32 data)
{
UReg_MSR& msr = (UReg_MSR&)MSR;
if (!msr.FP)
{
PC = NPC = data;
PowerPC::ppcState.Exceptions |= EXCEPTION_FPU_UNAVAILABLE;
PowerPC::CheckExceptions();
PowerPC::ppcState.downcount -= data;
return true;
}
return false;
}

static bool CheckDSI(u32 data)
{
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
PowerPC::CheckExceptions();
PowerPC::ppcState.downcount -= data;
return true;
}
return false;
Expand Down Expand Up @@ -161,22 +173,29 @@ void CachedInterpreter::Jit(u32 address)

if (!ops[i].skip)
{
if ((ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
bool check_fpu = (ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound;
bool endblock = (ops[i].opinfo->flags & FL_ENDBLOCK) != 0;
bool memcheck = (ops[i].opinfo->flags & FL_LOADSTORE) && jo.memcheck;

if (check_fpu)
{
m_code.emplace_back(CheckFPU, ops[i].address);
m_code.emplace_back(WritePC, ops[i].address);
m_code.emplace_back(CheckFPU, js.downcountAmount);
js.firstFPInstructionFound = true;
}

if (ops[i].opinfo->flags & FL_ENDBLOCK)
if (endblock || memcheck)
m_code.emplace_back(WritePC, ops[i].address);
m_code.emplace_back(GetInterpreterOp(ops[i].inst), ops[i].inst);
if (ops[i].opinfo->flags & FL_ENDBLOCK)
if (memcheck)
m_code.emplace_back(CheckDSI, js.downcountAmount);
if (endblock)
m_code.emplace_back(EndBlock, js.downcountAmount);
}
}
if (code_block.m_broken)
{
m_code.emplace_back(WritePC, nextPC);
m_code.emplace_back(WriteBrokenBlockNPC, nextPC);
m_code.emplace_back(EndBlock, js.downcountAmount);
}
m_code.emplace_back();
Expand All @@ -191,12 +210,5 @@ void CachedInterpreter::ClearCache()
{
m_code.clear();
JitBaseBlockCache::Clear();
}

void CachedInterpreter::WriteDestroyBlock(const u8* location, u32 address)
{
}

void CachedInterpreter::WriteLinkBlock(u8* location, const JitBlock& block)
{
UpdateMemoryOptions();
}
7 changes: 2 additions & 5 deletions Source/Core/Core/PowerPC/CachedInterpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,8 @@ class CachedInterpreter : public JitBase, JitBaseBlockCache

JitBaseBlockCache* GetBlockCache() override { return this; }
const char* GetName() override { return "Cached Interpreter"; }
void WriteLinkBlock(u8* location, const JitBlock& block) override;

void WriteDestroyBlock(const u8* location, u32 address) override;

const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; };
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override {}
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; }
private:
struct Instruction
{
Expand Down
27 changes: 5 additions & 22 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,29 +396,12 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
linkData.exitAddress = destination;
linkData.linkStatus = false;

// Link opportunity!
int block;
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
JitBlock* jb = blocks.GetBlock(block);
const u8* addr = jb->checkedEntry;
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(addr);
else
JMP(addr, true);
linkData.linkStatus = true;
}
MOV(32, PPCSTATE(pc), Imm32(destination));
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(asm_routines.dispatcher);
else
{
MOV(32, PPCSTATE(pc), Imm32(destination));
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(asm_routines.dispatcher);
else
JMP(asm_routines.dispatcher, true);
}
JMP(asm_routines.dispatcher, true);

b->linkData.push_back(linkData);

Expand Down
125 changes: 41 additions & 84 deletions Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ void Jit64AsmRoutineManager::Generate()
AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC));

#if 0 // debug mispredicts
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
#endif

ResetStack();
ResetStack(*this);

SUB(32, PPCSTATE(downcount), R(RSCRATCH2));

Expand Down Expand Up @@ -102,105 +102,62 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RMEM), Imm64((u64)Memory::logical_base));
SetJumpTarget(membaseend);

MOV(32, R(RSCRATCH), PPCSTATE(pc));

// TODO: We need to handle code which executes the same PC with
// different values of MSR.IR. It probably makes sense to handle
// MSR.DR here too, to allow IsOptimizableRAMAddress-based
// optimizations safe, because IR and DR are usually set/cleared together.
// TODO: Branching based on the 20 most significant bits of instruction
// addresses without translating them is wrong.
u64 icache = (u64)jit->GetBlockCache()->iCache.data();
u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data();
u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data();
u32 mask = 0;
FixupBranch no_mem;
FixupBranch exit_mem;
FixupBranch exit_vmem;
if (SConfig::GetInstance().bWii)
mask = JIT_ICACHE_EXRAM_BIT;
mask |= JIT_ICACHE_VMEM_BIT;
TEST(32, R(RSCRATCH), Imm32(mask));
no_mem = J_CC(CC_NZ);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
// The following is an translation of JitBaseBlockCache::Dispatch into assembly.

// Fast block number lookup.
MOV(32, R(RSCRATCH), PPCSTATE(pc));
u64 icache = reinterpret_cast<u64>(jit->GetBlockCache()->GetICache());
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2));
if (icache <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache));
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, static_cast<s32>(icache)));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icache));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}

exit_mem = J();
SetJumpTarget(no_mem);
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
if (icacheVmem <= INT_MAX)
// Check whether the block we found matches the current state.
u64 blocks = reinterpret_cast<u64>(jit->GetBlockCache()->GetBlocks());
IMUL(32, RSCRATCH, R(RSCRATCH), Imm32(sizeof(JitBlock)));
if (blocks <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem));
ADD(64, R(RSCRATCH), Imm32(static_cast<s32>(blocks)));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icacheVmem));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}

if (SConfig::GetInstance().bWii)
exit_vmem = J();
SetJumpTarget(no_vmem);
if (SConfig::GetInstance().bWii)
{
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));

if (icacheEx <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icacheEx));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}

SetJumpTarget(no_exram);
}
SetJumpTarget(exit_mem);
if (SConfig::GetInstance().bWii)
SetJumpTarget(exit_vmem);

TEST(32, R(RSCRATCH), R(RSCRATCH));
FixupBranch notfound = J_CC(CC_L);
// grab from list and jump to it
u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers();
if (codePointers <= INT_MAX)
{
JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(codePointers));
JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0));
MOV(64, R(RSCRATCH2), Imm64(blocks));
ADD(64, R(RSCRATCH), R(RSCRATCH2));
}
// Check both block.effectiveAddress and block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, effectiveAddress))));
FixupBranch notfound = J_CC(CC_NE);
// Success; branch to the block we found.
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
SetJumpTarget(notfound);

// Failure; call into the block cache to update the state, then try again.
// (We need to loop because Jit() might not actually generate a block
// if we hit an ISI.)

// We reset the stack because Jit might clear the code cache.
// Also if we are in the middle of disabling BLR optimization on windows
// we need to reset the stack before _resetstkoflw() is called in Jit
// otherwise we will generate a second stack overflow exception during DoJit()
ResetStack();
ResetStack(*this);

// Ok, no block, let's jit
// Ok, no block, let's call the slow dispatcher
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionA(32, (void*)&Jit, PPCSTATE(pc));
ABI_CallFunction(reinterpret_cast<void*>(&JitBase::Dispatch));
ABI_PopRegistersAndAdjustStack({}, 0);

JMP(dispatcherNoCheck, true); // no point in special casing this
// JMPptr(R(ABI_RETURN));
JMP(dispatcherNoCheck, true);

SetJumpTarget(bail);
doTiming = GetCodePtr();
Expand All @@ -217,7 +174,7 @@ void Jit64AsmRoutineManager::Generate()
// Landing pad for drec space
if (SConfig::GetInstance().bEnableDebugging)
SetJumpTarget(dbg_exit);
ResetStack();
ResetStack(*this);
if (m_stack_top)
{
ADD(64, R(RSP), Imm8(0x18));
Expand All @@ -232,12 +189,12 @@ void Jit64AsmRoutineManager::Generate()
GenerateCommon();
}

void Jit64AsmRoutineManager::ResetStack()
void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter)
{
if (m_stack_top)
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
emitter.MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
else
MOV(64, R(RSP), M(&s_saved_rsp));
emitter.MOV(64, R(RSP), M(&s_saved_rsp));
}

void Jit64AsmRoutineManager::GenerateCommon()
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/Jit64/JitAsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
{
private:
void Generate();
void ResetStack();
void GenerateCommon();
u8* m_stack_top;

Expand All @@ -41,4 +40,5 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
}

void Shutdown() { FreeCodeSpace(); }
void ResetStack(X64CodeBlock& emitter);
};
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
ABI_CallFunction((void*)JitInterface::InvalidateICache);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
asm_routines.ResetStack(*this);
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
Expand Down
Loading