Skip to content

Commit

Permalink
Merge pull request #4004 from degasus/dynamic-bat
Browse files Browse the repository at this point in the history
JitCache: Support for VMEM + MSR bits
  • Loading branch information
delroth committed Jul 16, 2016
2 parents 30de1ec + f9e5660 commit bb87bb7
Show file tree
Hide file tree
Showing 19 changed files with 448 additions and 370 deletions.
86 changes: 49 additions & 37 deletions Source/Core/Core/PowerPC/CachedInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ void CachedInterpreter::Init()
jo.enableBlocklink = false;

JitBaseBlockCache::Init();
UpdateMemoryOptions();

code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
Expand All @@ -41,34 +42,29 @@ void CachedInterpreter::Run()

void CachedInterpreter::SingleStep()
{
int block = GetBlockNumberFromStartAddress(PC);
if (block >= 0)
{
Instruction* code = (Instruction*)GetCompiledCodeFromBlock(block);
const u8* normalEntry = jit->GetBlockCache()->Dispatch();
const Instruction* code = reinterpret_cast<const Instruction*>(normalEntry);

while (true)
while (true)
{
switch (code->type)
{
switch (code->type)
{
case Instruction::INSTRUCTION_ABORT:
case Instruction::INSTRUCTION_ABORT:
return;

case Instruction::INSTRUCTION_TYPE_COMMON:
code->common_callback(UGeckoInstruction(code->data));
code++;
break;

case Instruction::INSTRUCTION_TYPE_CONDITIONAL:
bool ret = code->conditional_callback(code->data);
code++;
if (ret)
return;

case Instruction::INSTRUCTION_TYPE_COMMON:
code->common_callback(UGeckoInstruction(code->data));
code++;
break;

case Instruction::INSTRUCTION_TYPE_CONDITIONAL:
bool ret = code->conditional_callback(code->data);
code++;
if (ret)
return;
break;
}
break;
}
}

Jit(PC);
}

static void EndBlock(UGeckoInstruction data)
Expand All @@ -87,14 +83,30 @@ static void WritePC(UGeckoInstruction data)
NPC = data.hex + 4;
}

static void WriteBrokenBlockNPC(UGeckoInstruction data)
{
NPC = data.hex;
}

static bool CheckFPU(u32 data)
{
UReg_MSR& msr = (UReg_MSR&)MSR;
if (!msr.FP)
{
PC = NPC = data;
PowerPC::ppcState.Exceptions |= EXCEPTION_FPU_UNAVAILABLE;
PowerPC::CheckExceptions();
PowerPC::ppcState.downcount -= data;
return true;
}
return false;
}

static bool CheckDSI(u32 data)
{
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
PowerPC::CheckExceptions();
PowerPC::ppcState.downcount -= data;
return true;
}
return false;
Expand Down Expand Up @@ -161,22 +173,29 @@ void CachedInterpreter::Jit(u32 address)

if (!ops[i].skip)
{
if ((ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
bool check_fpu = (ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound;
bool endblock = (ops[i].opinfo->flags & FL_ENDBLOCK) != 0;
bool memcheck = (ops[i].opinfo->flags & FL_LOADSTORE) && jo.memcheck;

if (check_fpu)
{
m_code.emplace_back(CheckFPU, ops[i].address);
m_code.emplace_back(WritePC, ops[i].address);
m_code.emplace_back(CheckFPU, js.downcountAmount);
js.firstFPInstructionFound = true;
}

if (ops[i].opinfo->flags & FL_ENDBLOCK)
if (endblock || memcheck)
m_code.emplace_back(WritePC, ops[i].address);
m_code.emplace_back(GetInterpreterOp(ops[i].inst), ops[i].inst);
if (ops[i].opinfo->flags & FL_ENDBLOCK)
if (memcheck)
m_code.emplace_back(CheckDSI, js.downcountAmount);
if (endblock)
m_code.emplace_back(EndBlock, js.downcountAmount);
}
}
if (code_block.m_broken)
{
m_code.emplace_back(WritePC, nextPC);
m_code.emplace_back(WriteBrokenBlockNPC, nextPC);
m_code.emplace_back(EndBlock, js.downcountAmount);
}
m_code.emplace_back();
Expand All @@ -191,12 +210,5 @@ void CachedInterpreter::ClearCache()
{
m_code.clear();
JitBaseBlockCache::Clear();
}

void CachedInterpreter::WriteDestroyBlock(const u8* location, u32 address)
{
}

void CachedInterpreter::WriteLinkBlock(u8* location, const JitBlock& block)
{
UpdateMemoryOptions();
}
7 changes: 2 additions & 5 deletions Source/Core/Core/PowerPC/CachedInterpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,8 @@ class CachedInterpreter : public JitBase, JitBaseBlockCache

JitBaseBlockCache* GetBlockCache() override { return this; }
const char* GetName() override { return "Cached Interpreter"; }
void WriteLinkBlock(u8* location, const JitBlock& block) override;

void WriteDestroyBlock(const u8* location, u32 address) override;

const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; };
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override {}
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; }
private:
struct Instruction
{
Expand Down
27 changes: 5 additions & 22 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,29 +396,12 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
linkData.exitAddress = destination;
linkData.linkStatus = false;

// Link opportunity!
int block;
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
JitBlock* jb = blocks.GetBlock(block);
const u8* addr = jb->checkedEntry;
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(addr);
else
JMP(addr, true);
linkData.linkStatus = true;
}
MOV(32, PPCSTATE(pc), Imm32(destination));
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(asm_routines.dispatcher);
else
{
MOV(32, PPCSTATE(pc), Imm32(destination));
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(asm_routines.dispatcher);
else
JMP(asm_routines.dispatcher, true);
}
JMP(asm_routines.dispatcher, true);

b->linkData.push_back(linkData);

Expand Down
125 changes: 41 additions & 84 deletions Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ void Jit64AsmRoutineManager::Generate()
AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC));

#if 0 // debug mispredicts
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
#endif

ResetStack();
ResetStack(*this);

SUB(32, PPCSTATE(downcount), R(RSCRATCH2));

Expand Down Expand Up @@ -102,105 +102,62 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RMEM), Imm64((u64)Memory::logical_base));
SetJumpTarget(membaseend);

MOV(32, R(RSCRATCH), PPCSTATE(pc));

// TODO: We need to handle code which executes the same PC with
// different values of MSR.IR. It probably makes sense to handle
// MSR.DR here too, to allow IsOptimizableRAMAddress-based
// optimizations safe, because IR and DR are usually set/cleared together.
// TODO: Branching based on the 20 most significant bits of instruction
// addresses without translating them is wrong.
u64 icache = (u64)jit->GetBlockCache()->iCache.data();
u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data();
u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data();
u32 mask = 0;
FixupBranch no_mem;
FixupBranch exit_mem;
FixupBranch exit_vmem;
if (SConfig::GetInstance().bWii)
mask = JIT_ICACHE_EXRAM_BIT;
mask |= JIT_ICACHE_VMEM_BIT;
TEST(32, R(RSCRATCH), Imm32(mask));
no_mem = J_CC(CC_NZ);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
// The following is an translation of JitBaseBlockCache::Dispatch into assembly.

// Fast block number lookup.
MOV(32, R(RSCRATCH), PPCSTATE(pc));
u64 icache = reinterpret_cast<u64>(jit->GetBlockCache()->GetICache());
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2));
if (icache <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache));
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, static_cast<s32>(icache)));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icache));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}

exit_mem = J();
SetJumpTarget(no_mem);
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
if (icacheVmem <= INT_MAX)
// Check whether the block we found matches the current state.
u64 blocks = reinterpret_cast<u64>(jit->GetBlockCache()->GetBlocks());
IMUL(32, RSCRATCH, R(RSCRATCH), Imm32(sizeof(JitBlock)));
if (blocks <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem));
ADD(64, R(RSCRATCH), Imm32(static_cast<s32>(blocks)));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icacheVmem));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}

if (SConfig::GetInstance().bWii)
exit_vmem = J();
SetJumpTarget(no_vmem);
if (SConfig::GetInstance().bWii)
{
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));

if (icacheEx <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icacheEx));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}

SetJumpTarget(no_exram);
}
SetJumpTarget(exit_mem);
if (SConfig::GetInstance().bWii)
SetJumpTarget(exit_vmem);

TEST(32, R(RSCRATCH), R(RSCRATCH));
FixupBranch notfound = J_CC(CC_L);
// grab from list and jump to it
u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers();
if (codePointers <= INT_MAX)
{
JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(codePointers));
JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0));
MOV(64, R(RSCRATCH2), Imm64(blocks));
ADD(64, R(RSCRATCH), R(RSCRATCH2));
}
// Check both block.effectiveAddress and block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, effectiveAddress))));
FixupBranch notfound = J_CC(CC_NE);
// Success; branch to the block we found.
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
SetJumpTarget(notfound);

// Failure; call into the block cache to update the state, then try again.
// (We need to loop because Jit() might not actually generate a block
// if we hit an ISI.)

// We reset the stack because Jit might clear the code cache.
// Also if we are in the middle of disabling BLR optimization on windows
// we need to reset the stack before _resetstkoflw() is called in Jit
// otherwise we will generate a second stack overflow exception during DoJit()
ResetStack();
ResetStack(*this);

// Ok, no block, let's jit
// Ok, no block, let's call the slow dispatcher
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionA(32, (void*)&Jit, PPCSTATE(pc));
ABI_CallFunction(reinterpret_cast<void*>(&JitBase::Dispatch));
ABI_PopRegistersAndAdjustStack({}, 0);

JMP(dispatcherNoCheck, true); // no point in special casing this
// JMPptr(R(ABI_RETURN));
JMP(dispatcherNoCheck, true);

SetJumpTarget(bail);
doTiming = GetCodePtr();
Expand All @@ -217,7 +174,7 @@ void Jit64AsmRoutineManager::Generate()
// Landing pad for drec space
if (SConfig::GetInstance().bEnableDebugging)
SetJumpTarget(dbg_exit);
ResetStack();
ResetStack(*this);
if (m_stack_top)
{
ADD(64, R(RSP), Imm8(0x18));
Expand All @@ -232,12 +189,12 @@ void Jit64AsmRoutineManager::Generate()
GenerateCommon();
}

void Jit64AsmRoutineManager::ResetStack()
void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter)
{
if (m_stack_top)
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
emitter.MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
else
MOV(64, R(RSP), M(&s_saved_rsp));
emitter.MOV(64, R(RSP), M(&s_saved_rsp));
}

void Jit64AsmRoutineManager::GenerateCommon()
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/Jit64/JitAsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
{
private:
void Generate();
void ResetStack();
void GenerateCommon();
u8* m_stack_top;

Expand All @@ -41,4 +40,5 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
}

void Shutdown() { FreeCodeSpace(); }
void ResetStack(X64CodeBlock& emitter);
};
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
ABI_CallFunction((void*)JitInterface::InvalidateICache);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
asm_routines.ResetStack(*this);
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
Expand Down

0 comments on commit bb87bb7

Please sign in to comment.