New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Jit: Improve block lookup performance through a shm memory segment. #11737
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,10 @@ | |
|
||
using namespace Gen; | ||
|
||
// These need to be next of each other so that the assembly | ||
// code can compare them easily. | ||
static_assert(offsetof(JitBlockData, effectiveAddress) + 4 == offsetof(JitBlockData, msrBits)); | ||
|
||
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit) | ||
{ | ||
} | ||
|
@@ -103,35 +107,58 @@ void Jit64AsmRoutineManager::Generate() | |
const bool assembly_dispatcher = true; | ||
if (assembly_dispatcher) | ||
{ | ||
// Fast block number lookup. | ||
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 | ||
MOV(32, R(RSCRATCH), PPCSTATE(pc)); | ||
// Keep a copy for later. | ||
MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); | ||
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMap()); | ||
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 2)); | ||
if (icache <= INT_MAX) | ||
if (m_jit.GetBlockCache()->GetFastBlockMap()) | ||
{ | ||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache))); | ||
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMap()); | ||
MOV(32, R(RSCRATCH), PPCSTATE(pc)); | ||
|
||
MOV(64, R(RSCRATCH2), Imm64(icache)); | ||
// Each 4-byte offset of the PC register corresponds to a 8-byte offset | ||
// in the lookup table due to host pointers being 8-bytes long. | ||
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); | ||
} | ||
else | ||
{ | ||
MOV(64, R(RSCRATCH2), Imm64(icache)); | ||
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); | ||
// Fast block number lookup. | ||
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2 | ||
MOV(32, R(RSCRATCH), PPCSTATE(pc)); | ||
// Keep a copy for later. | ||
MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); | ||
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMapFallback()); | ||
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 2)); | ||
if (icache <= INT_MAX) | ||
{ | ||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache))); | ||
} | ||
else | ||
{ | ||
MOV(64, R(RSCRATCH2), Imm64(icache)); | ||
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0)); | ||
} | ||
} | ||
|
||
// Check if we found a block. | ||
TEST(64, R(RSCRATCH), R(RSCRATCH)); | ||
FixupBranch not_found = J_CC(CC_Z); | ||
|
||
// Check both block.effectiveAddress and block.msrBits. | ||
// Check block.msrBits. | ||
MOV(32, R(RSCRATCH2), PPCSTATE(msr)); | ||
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); | ||
SHL(64, R(RSCRATCH2), Imm8(32)); | ||
// RSCRATCH_EXTRA still has the PC. | ||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); | ||
CMP(64, R(RSCRATCH2), | ||
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, effectiveAddress)))); | ||
|
||
if (m_jit.GetBlockCache()->GetFastBlockMap()) | ||
{ | ||
CMP(32, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, msrBits)))); | ||
} | ||
else | ||
{ | ||
// Also check the block.effectiveAddress | ||
SHL(64, R(RSCRATCH2), Imm8(32)); | ||
// RSCRATCH_EXTRA still has the PC. | ||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); | ||
CMP(64, R(RSCRATCH2), | ||
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, effectiveAddress)))); | ||
Comment on lines
+158
to
+159
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not your fault but jeez this is evil and only works because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More specifically, I would recommend something like:
This way, any violations will be caught at compile time, and the assert is clear enough that I don't think writing a comment is necessary. |
||
} | ||
|
||
FixupBranch state_mismatch = J_CC(CC_NE); | ||
|
||
// Success; branch to the block we found. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
#include <vector> | ||
|
||
#include "Common/CommonTypes.h" | ||
#include "Core/HW/Memmap.h" | ||
|
||
class JitBase; | ||
|
||
|
@@ -131,8 +132,11 @@ class JitBaseBlockCache | |
// is valid (MSR.IR and MSR.DR, the address translation bits). | ||
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; | ||
|
||
static constexpr u32 FAST_BLOCK_MAP_ELEMENTS = 0x10000; | ||
static constexpr u32 FAST_BLOCK_MAP_MASK = FAST_BLOCK_MAP_ELEMENTS - 1; | ||
// The value for the map is determined like this: | ||
// ((4 GB guest memory space) / (4 bytes per address)) * sizeof(JitBlock*) | ||
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x2'0000'0000; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should document how one arrives at this number. You have a guest memory space of 4 GB and PPC instructions are always aligned to 4 bytes, so that gives you 4 GB / 4 bytes = 0x4000'0000 possible entries in the map (I guess it's a lookup table now technically but whatever). Each entry points to a |
||
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000; | ||
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1; | ||
|
||
explicit JitBaseBlockCache(JitBase& jit); | ||
virtual ~JitBaseBlockCache(); | ||
|
@@ -144,6 +148,7 @@ class JitBaseBlockCache | |
|
||
// Code Cache | ||
JitBlock** GetFastBlockMap(); | ||
JitBlock** GetFastBlockMapFallback(); | ||
void RunOnBlocks(std::function<void(const JitBlock&)> f); | ||
|
||
JitBlock* AllocateBlock(u32 em_address); | ||
|
@@ -203,7 +208,16 @@ class JitBaseBlockCache | |
// It is used to provide a fast way to query if no icache invalidation is needed. | ||
ValidBlockBitSet valid_block; | ||
|
||
// This array is indexed with the masked PC and likely holds the correct block id. | ||
// This array is indexed with the shifted PC and likely holds the correct block id. | ||
// This is used as a fast cache of block_map used in the assembly dispatcher. | ||
std::array<JitBlock*, FAST_BLOCK_MAP_ELEMENTS> fast_block_map{}; // start_addr & mask -> number | ||
// It is implemented via a shm segment using m_block_map_arena. | ||
JitBlock** m_fast_block_map = 0; | ||
Common::MemArena m_block_map_arena; | ||
|
||
// An alternative for the above fast_block_map but without a shm segment | ||
// in case the shm memory region couldn't be allocated. | ||
std::array<JitBlock*, FAST_BLOCK_MAP_FALLBACK_ELEMENTS> | ||
m_fast_block_map_fallback{}; // start_addr & mask -> number | ||
|
||
JitBlock** m_fast_block_map_ptr = 0; | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could also use a comment for how this actually arrives at the correct address, it took me a minute to figure out why
SCALE_2
is correct.(It's because each 4-byte offset of the PC register corresponds to a 8-byte offset in the lookup table, due to 8 byte host pointers.)