Skip to content
Permalink
Browse files
Merge pull request #10745 from JosJuice/softmmu
JitArm64: Implement "soft MMU"
  • Loading branch information
JMC47 committed Jul 8, 2022
2 parents 828afc6 + 62ec19c commit 59e8aac
Show file tree
Hide file tree
Showing 9 changed files with 248 additions and 110 deletions.
@@ -47,6 +47,8 @@ namespace Memory
// Store the MemArena here
u8* physical_base = nullptr;
u8* logical_base = nullptr;
u8* physical_page_mappings_base = nullptr;
u8* logical_page_mappings_base = nullptr;
static bool is_fastmem_arena_initialized = false;

// The MemArena class
@@ -223,6 +225,9 @@ static std::array<PhysicalMemoryRegion, 4> s_physical_regions;

static std::vector<LogicalMemoryView> logical_mapped_entries;

static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_physical_page_mappings;
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_logical_page_mappings;

void Init()
{
const auto get_mem1_size = [] {
@@ -280,6 +285,8 @@ void Init()
}
g_arena.GrabSHMSegment(mem_size);

s_physical_page_mappings.fill(nullptr);

// Create an anonymous view of the physical memory
for (const PhysicalMemoryRegion& region : s_physical_regions)
{
@@ -295,8 +302,17 @@ void Init()
region.physical_address, region.size);
exit(0);
}

for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE)
{
const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT;
s_physical_page_mappings[index] = *region.out_pointer + i;
}
}

physical_page_mappings_base = reinterpret_cast<u8*>(s_physical_page_mappings.data());
logical_page_mappings_base = reinterpret_cast<u8*>(s_logical_page_mappings.data());

InitMMIO(wii);

Clear();
@@ -347,14 +363,14 @@ bool InitFastmemArena()

void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
{
if (!is_fastmem_arena_initialized)
return;

for (auto& entry : logical_mapped_entries)
{
g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();

s_logical_page_mappings.fill(nullptr);

for (u32 i = 0; i < dbat_table.size(); ++i)
{
if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT)
@@ -375,19 +391,27 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
if (intersection_start < intersection_end)
{
// Found an overlapping region; map it.
u32 position = physical_region.shm_position + intersection_start - mapping_address;
u8* base = logical_base + logical_address + intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start;

void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
if (!mapped_pointer)
if (is_fastmem_arena_initialized)
{
PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
intersection_start, mapped_size, logical_address);
exit(0);
u32 position = physical_region.shm_position + intersection_start - mapping_address;
u8* base = logical_base + logical_address + intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start;

void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
if (!mapped_pointer)
{
PanicAlertFmt(
"Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
intersection_start, mapped_size, logical_address);
exit(0);
}
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
}
logical_mapped_entries.push_back({mapped_pointer, mapped_size});

s_logical_page_mappings[i] =
*physical_region.out_pointer + intersection_start - mapping_address;
}
}
}
@@ -21,16 +21,18 @@ class Mapping;
namespace Memory
{
// Base is a pointer to the base of the memory map. Yes, some MMU tricks
// are used to set up a full GC or Wii memory map in process memory. on
// 32-bit, you have to mask your offsets with 0x3FFFFFFF. This means that
// some things are mirrored too many times, but eh... it works.

// are used to set up a full GC or Wii memory map in process memory.
// In 64-bit, this might point to "high memory" (above the 32-bit limit),
// so be sure to load it into a 64-bit register.
extern u8* physical_base;
extern u8* logical_base;

// These are guaranteed to point to "low memory" addresses (sub-32-bit).
// This page table is used for a "soft MMU" implementation when
// setting up the full memory map in process memory isn't possible.
extern u8* physical_page_mappings_base;
extern u8* logical_page_mappings_base;

// The actual memory used for backing the memory map.
extern u8* m_pRAM;
extern u8* m_pEXRAM;
extern u8* m_pL1Cache;
@@ -215,26 +215,49 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
// Dump a memory range of code
void DumpCode(const u8* start, const u8* end);

// This enum is used for selecting an implementation of EmitBackpatchRoutine.
enum class MemAccessMode
{
// Always calls the slow C++ code. For performance reasons, should generally only be used if
// the guest address is known in advance and IsOptimizableRAMAddress returns false for it.
AlwaysSafe,
// Only emits fast access code. Must only be used if the guest address is known in advance
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
AlwaysUnsafe,
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena):
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
// then branches to either the fast access code or the slow C++ code.
Auto,
};

// This is the core routine for accessing emulated memory, with support for
// many different kinds of loads and stores as well as fastmem backpatching.
// many different kinds of loads and stores as well as fastmem/backpatching.
//
// Registers used:
//
// addr scratch
// Store: X1 X0
// Load: X0
// Zero 256: X0 X30
// Store float: X1 Q0
// Load float: X0
//
// If fastmem && !do_farcode, the addr argument can be any register.
// If mode == AlwaysUnsafe, the addr argument can be any register.
// Otherwise it must be the register listed in the table above.
//
// Additional scratch registers are used in the following situations:
// fastmem && do_farcode && emitting_routine: X2
// fastmem && do_farcode && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0
// fastmem && do_farcode && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3
// !fastmem || do_farcode: X30 (plus lots more unless you set gprs_to_push and fprs_to_push)
void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, Arm64Gen::ARM64Reg RS,
//
// emitting_routine && mode == Auto: X2
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3
// mode != AlwaysSafe && !jo.fastmem_arena: X2
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
// !emitting_routine && mode == Auto && jo.fastmem_arena: X30
//
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
// may be clobbered if mode != AlwaysUnsafe.
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);

@@ -54,19 +54,44 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory);
}

void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, ARM64Reg RS,
ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push,
void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
BitSet32 gprs_to_push, BitSet32 fprs_to_push,
bool emitting_routine)
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);

const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;

bool in_far_code = false;
const u8* fastmem_start = GetCodePtr();
std::optional<FixupBranch> slowmem_fixup;

if (fastmem)
if (emit_fastmem)
{
if (do_farcode && emitting_routine)
ARM64Reg memory_base = MEM_REG;
ARM64Reg memory_offset = addr;

if (!jo.fastmem_arena)
{
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;

memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W2;

LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));

if (emit_slowmem)
{
FixupBranch pass = CBNZ(memory_base);
slowmem_fixup = B();
SetJumpTarget(pass);
}

AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
}
else if (emit_slowmem && emitting_routine)
{
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2;
@@ -79,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
ARM64Reg temp = ARM64Reg::D0;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);

m_float_emit.STR(access_size, temp, MEM_REG, addr);
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
}
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset);

ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
true, false);
@@ -94,44 +119,44 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);

if (flags & BackPatchInfo::FLAG_SIZE_32)
STR(temp, MEM_REG, addr);
STR(temp, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
STRH(temp, MEM_REG, addr);
STRH(temp, memory_base, memory_offset);
else
STRB(temp, MEM_REG, addr);
STRB(temp, memory_base, memory_offset);
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
// This literally only stores 32bytes of zeros to the target address
ARM64Reg temp = ARM64Reg::X30;
ADD(temp, addr, MEM_REG);
ADD(temp, memory_base, memory_offset);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
LDR(RS, MEM_REG, addr);
LDR(RS, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
LDRH(RS, MEM_REG, addr);
LDRH(RS, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
LDRB(RS, MEM_REG, addr);
LDRB(RS, memory_base, memory_offset);

ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
}
}
const u8* fastmem_end = GetCodePtr();

if (!fastmem || do_farcode)
if (emit_slowmem)
{
const bool memcheck = jo.memcheck && !emitting_routine;

if (fastmem && do_farcode)
if (emit_fastmem)
{
in_far_code = true;
SwitchToFarCode();

if (!emitting_routine)
if (jo.fastmem_arena && !emitting_routine)
{
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
fastmem_area->fastmem_code = fastmem_start;
@@ -261,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR

if (in_far_code)
{
if (emitting_routine)
if (slowmem_fixup)
{
FixupBranch done = B();
SwitchToNearCode();

0 comments on commit 59e8aac

Please sign in to comment.