@@ -9,6 +9,7 @@
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
#include "Common/PerformanceCounter.h"
#include "Common/StringUtil.h"

@@ -45,7 +46,7 @@ void JitArm64::Init()
{
const size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE;
AllocCodeSpace(CODE_SIZE + child_code_size);
AddChildCodeSpace(&farcode, child_code_size);
AddChildCodeSpace(&m_far_code, child_code_size);

jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
jo.enableBlocklink = true;
@@ -68,6 +69,8 @@ void JitArm64::Init()

AllocStack();
GenerateAsm();

ResetFreeMemoryRanges();
}

bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
@@ -123,15 +126,26 @@ bool JitArm64::HandleStackFault()
void JitArm64::ClearCache()
{
m_fault_to_handler.clear();
m_handler_to_loc.clear();

blocks.Clear();
blocks.ClearRangesToFree();
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ClearCodeSpace();
farcode.ClearCodeSpace();
m_far_code.ClearCodeSpace();
UpdateMemoryAndExceptionOptions();

GenerateAsm();

ResetFreeMemoryRanges();
}

void JitArm64::ResetFreeMemoryRanges()
{
// Set the near and far code regions as unused.
m_free_ranges_near.clear();
m_free_ranges_near.insert(GetWritableCodePtr(), GetWritableCodeEnd());
m_free_ranges_far.clear();
m_free_ranges_far.insert(m_far_code.GetWritableCodePtr(), m_far_code.GetWritableCodeEnd());
}

void JitArm64::Shutdown()
@@ -577,7 +591,12 @@ void JitArm64::SingleStep()
pExecAddr();
}

void JitArm64::Jit(u32)
void JitArm64::Jit(u32 em_address)
{
Jit(em_address, true);
}

void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
{
if (m_cleanup_after_stackfault)
{
@@ -589,14 +608,31 @@ void JitArm64::Jit(u32)
#endif
}

if (IsAlmostFull() || farcode.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
{
if (SConfig::GetInstance().bJITNoBlockCache)
ClearCache();

// Check if any code blocks have been freed in the block cache and transfer this information to
// the local rangesets to allow overwriting them with new code.
for (auto range : blocks.GetRangesToFreeNear())
{
auto first_fastmem_area = m_fault_to_handler.upper_bound(range.first);
auto last_fastmem_area = first_fastmem_area;
auto end = m_fault_to_handler.end();
while (last_fastmem_area != end && last_fastmem_area->first <= range.second)
++last_fastmem_area;
m_fault_to_handler.erase(first_fastmem_area, last_fastmem_area);

m_free_ranges_near.insert(range.first, range.second);
}
for (auto range : blocks.GetRangesToFreeFar())
{
m_free_ranges_far.insert(range.first, range.second);
}
blocks.ClearRangesToFree();

const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;

std::size_t block_size = m_code_buffer.size();
const u32 em_address = PowerPC::ppcState.pc;

if (SConfig::GetInstance().bEnableDebugging)
{
@@ -619,12 +655,75 @@ void JitArm64::Jit(u32)
return;
}

JitBlock* b = blocks.AllocateBlock(em_address);
DoJit(em_address, b, nextPC);
blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
if (SetEmitterStateToFreeCodeRegion())
{
u8* near_start = GetWritableCodePtr();
u8* far_start = m_far_code.GetWritableCodePtr();

JitBlock* b = blocks.AllocateBlock(em_address);
if (DoJit(em_address, b, nextPC))
{
// Code generation succeeded.

// Mark the memory regions that this code block uses as used in the local rangesets.
u8* near_end = GetWritableCodePtr();
if (near_start != near_end)
m_free_ranges_near.erase(near_start, near_end);
u8* far_end = m_far_code.GetWritableCodePtr();
if (far_start != far_end)
m_free_ranges_far.erase(far_start, far_end);

// Store the used memory regions in the block so we know what to mark as unused when the
// block gets invalidated.
b->near_begin = near_start;
b->near_end = near_end;
b->far_begin = far_start;
b->far_end = far_end;

blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
return;
}
}

if (clear_cache_and_retry_on_failure)
{
// Code generation failed due to not enough free space in either the near or far code regions.
// Clear the entire JIT cache and retry.
WARN_LOG(POWERPC, "flushing code caches, please report if this happens a lot");
ClearCache();
Jit(em_address, false);
return;
}

PanicAlertT("JIT failed to find code space after a cache clear. This should never happen. Please "
"report this incident on the bug tracker. Dolphin will now exit.");
exit(-1);
}

bool JitArm64::SetEmitterStateToFreeCodeRegion()
{
// Find the largest free memory blocks and set code emitters to point at them.
// If we can't find a free block return false instead, which will trigger a JIT cache clear.
auto free_near = m_free_ranges_near.by_size_begin();
if (free_near == m_free_ranges_near.by_size_end())
{
WARN_LOG(POWERPC, "Failed to find free memory region in near code region.");
return false;
}
SetCodePtr(free_near.from(), free_near.to());

auto free_far = m_free_ranges_far.by_size_begin();
if (free_far == m_free_ranges_far.by_size_end())
{
WARN_LOG(POWERPC, "Failed to find free memory region in far code region.");
return false;
}
m_far_code.SetCodePtr(free_far.from(), free_far.to());

return true;
}

void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
{
js.isLastInstruction = false;
js.firstFPInstructionFound = false;
@@ -871,9 +970,21 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
WriteExit(nextPC);
}

if (HasWriteFailed() || m_far_code.HasWriteFailed())
{
if (HasWriteFailed())
WARN_LOG(POWERPC, "JIT ran out of space in near code region during code generation.");
if (m_far_code.HasWriteFailed())
WARN_LOG(POWERPC, "JIT ran out of space in far code region during code generation.");

return false;
}

b->codeSize = (u32)(GetCodePtr() - start);
b->originalSize = code_block.m_num_instructions;

FlushIcache();
farcode.FlushIcache();
m_far_code.FlushIcache();

return true;
}
@@ -7,6 +7,8 @@
#include <map>
#include <tuple>

#include <rangeset/rangesizeset.h>

#include "Common/Arm64Emitter.h"

#include "Core/PowerPC/CPUCoreBase.h"
@@ -39,7 +41,8 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void Run() override;
void SingleStep() override;

void Jit(u32) override;
void Jit(u32 em_address) override;
void Jit(u32 em_address, bool clear_cache_and_retry_on_failure);

const char* GetName() const override { return "JITARM64"; }

@@ -178,21 +181,6 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
bool IsFPRStoreSafe(size_t guest_reg) const;

protected:
struct SlowmemHandler
{
Arm64Gen::ARM64Reg dest_reg;
Arm64Gen::ARM64Reg addr_reg;
BitSet32 gprs;
BitSet32 fprs;
u32 flags;

bool operator<(const SlowmemHandler& rhs) const
{
return std::tie(dest_reg, addr_reg, gprs, fprs, flags) <
std::tie(rhs.dest_reg, rhs.addr_reg, rhs.gprs, rhs.fprs, rhs.flags);
}
};

struct FastmemArea
{
const u8* fastmem_code;
@@ -206,20 +194,23 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
// Simple functions to switch between near and far code emitting
void SwitchToFarCode()
{
nearcode = GetWritableCodePtr();
SetCodePtrUnsafe(farcode.GetWritableCodePtr());
m_near_code = GetWritableCodePtr();
m_near_code_end = GetWritableCodeEnd();
m_near_code_write_failed = HasWriteFailed();
SetCodePtrUnsafe(m_far_code.GetWritableCodePtr(), m_far_code.GetWritableCodeEnd(),
m_far_code.HasWriteFailed());
AlignCode16();
m_in_farcode = true;
m_in_far_code = true;
}

void SwitchToNearCode()
{
farcode.SetCodePtrUnsafe(GetWritableCodePtr());
SetCodePtrUnsafe(nearcode);
m_in_farcode = false;
m_far_code.SetCodePtrUnsafe(GetWritableCodePtr(), GetWritableCodeEnd(), HasWriteFailed());
SetCodePtrUnsafe(m_near_code, m_near_code_end, m_near_code_write_failed);
m_in_far_code = false;
}

bool IsInFarCode() const { return m_in_farcode; }
bool IsInFarCode() const { return m_in_far_code; }

// Dump a memory range of code
void DumpCode(const u8* start, const u8* end);
@@ -238,14 +229,20 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
Arm64Gen::FixupBranch CheckIfSafeAddress(Arm64Gen::ARM64Reg addr, Arm64Gen::ARM64Reg tmp1,
Arm64Gen::ARM64Reg tmp2);

void DoJit(u32 em_address, JitBlock* b, u32 nextPC);
bool DoJit(u32 em_address, JitBlock* b, u32 nextPC);

// Finds a free memory region and sets the near and far code emitters to point at that region.
// Returns false if no free memory region can be found for either of the two.
bool SetEmitterStateToFreeCodeRegion();

void DoDownCount();
void Cleanup();
void ResetStack();
void AllocStack();
void FreeStack();

void ResetFreeMemoryRanges();

// AsmRoutines
void GenerateAsm();
void GenerateCommonAsm();
@@ -296,21 +293,27 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA

// <Fastmem fault location, slowmem handler location>
std::map<const u8*, FastmemArea> m_fault_to_handler;
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
Arm64GPRCache gpr;
Arm64FPRCache fpr;

JitArm64BlockCache blocks{*this};

Arm64Gen::ARM64FloatEmitter m_float_emit;

Arm64Gen::ARM64CodeBlock farcode;
u8* nearcode; // Backed up when we switch to far code.
bool m_in_farcode = false;
Arm64Gen::ARM64CodeBlock m_far_code;
bool m_in_far_code = false;

// Backed up when we switch to far code.
u8* m_near_code;
u8* m_near_code_end;
bool m_near_code_write_failed;

bool m_enable_blr_optimization;
bool m_cleanup_after_stackfault = false;
u8* m_stack_base = nullptr;
u8* m_stack_pointer = nullptr;
u8* m_saved_stack_pointer = nullptr;

HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far;
};
@@ -12,6 +12,12 @@ JitArm64BlockCache::JitArm64BlockCache(JitBase& jit) : JitBaseBlockCache{jit}
{
}

void JitArm64BlockCache::Init()
{
JitBaseBlockCache::Init();
ClearRangesToFree();
}

void JitArm64BlockCache::WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit,
const JitBlock::LinkData& source, const JitBlock* dest)
{
@@ -60,7 +66,7 @@ void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const
{
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
u8* location = source.exitPtrs;
ARM64XEmitter emit(location);
ARM64XEmitter emit(location, location + 12);

WriteLinkBlock(emit, source, dest);
emit.FlushIcache();
@@ -69,9 +75,35 @@ void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const
void JitArm64BlockCache::WriteDestroyBlock(const JitBlock& block)
{
// Only clear the entry points as we might still be within this block.
ARM64XEmitter emit(block.checkedEntry);
ARM64XEmitter emit(block.checkedEntry, block.normalEntry + 4);
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
while (emit.GetWritableCodePtr() <= block.normalEntry)
emit.BRK(0x123);
emit.FlushIcache();
}

void JitArm64BlockCache::DestroyBlock(JitBlock& block)
{
JitBaseBlockCache::DestroyBlock(block);

if (block.near_begin != block.near_end)
m_ranges_to_free_on_next_codegen_near.emplace_back(block.near_begin, block.near_end);
if (block.far_begin != block.far_end)
m_ranges_to_free_on_next_codegen_far.emplace_back(block.far_begin, block.far_end);
}

const std::vector<std::pair<u8*, u8*>>& JitArm64BlockCache::GetRangesToFreeNear() const
{
return m_ranges_to_free_on_next_codegen_near;
}

const std::vector<std::pair<u8*, u8*>>& JitArm64BlockCache::GetRangesToFreeFar() const
{
return m_ranges_to_free_on_next_codegen_far;
}

void JitArm64BlockCache::ClearRangesToFree()
{
m_ranges_to_free_on_next_codegen_near.clear();
m_ranges_to_free_on_next_codegen_far.clear();
}
@@ -3,6 +3,8 @@

#pragma once

#include <vector>

#include "Common/Arm64Emitter.h"
#include "Core/PowerPC/JitCommon/JitCache.h"

@@ -15,10 +17,22 @@ class JitArm64BlockCache : public JitBaseBlockCache
public:
explicit JitArm64BlockCache(JitBase& jit);

void Init() override;

void DestroyBlock(JitBlock& block) override;

const std::vector<std::pair<u8*, u8*>>& GetRangesToFreeNear() const;
const std::vector<std::pair<u8*, u8*>>& GetRangesToFreeFar() const;

void ClearRangesToFree();

void WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit, const JitBlock::LinkData& source,
const JitBlock* dest = nullptr);

private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
void WriteDestroyBlock(const JitBlock& block) override;

std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen_near;
std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen_far;
};
@@ -122,39 +122,14 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
{
if (fastmem && do_farcode)
{
if (emitting_routine)
{
in_far_code = true;
SwitchToFarCode();
}
else
{
SlowmemHandler handler;
handler.dest_reg = RS;
handler.addr_reg = addr;
handler.gprs = gprs_to_push;
handler.fprs = fprs_to_push;
handler.flags = flags;
in_far_code = true;
SwitchToFarCode();

if (!emitting_routine)
{
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
auto handler_loc_iter = m_handler_to_loc.find(handler);

if (handler_loc_iter == m_handler_to_loc.end())
{
in_far_code = true;
SwitchToFarCode();
const u8* handler_loc = GetCodePtr();
m_handler_to_loc[handler] = handler_loc;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
}
else
{
const u8* handler_loc = handler_loc_iter->second;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
return;
}
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = GetCodePtr();
}
}

@@ -294,7 +269,7 @@ bool JitArm64::HandleFastmemFault(uintptr_t access_address, SContext* ctx)
return false;

const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ARM64XEmitter emitter(const_cast<u8*>(fastmem_area_start));
ARM64XEmitter emitter(const_cast<u8*>(fastmem_area_start), const_cast<u8*>(fastmem_area_end));

emitter.BL(slow_handler_iter->second.slowmem_code);

@@ -35,7 +35,7 @@ class TestConversion : private JitArm64
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;

AllocCodeSpace(4096);
AddChildCodeSpace(&farcode, 2048);
AddChildCodeSpace(&m_far_code, 2048);

gpr.Init(this);
fpr.Init(this);