Skip to content

Commit

Permalink
Merge pull request #114 from delroth/mmio-optimization
Browse files Browse the repository at this point in the history
MMIO reads JIT optimization
  • Loading branch information
delroth committed Mar 8, 2014
2 parents 886060a + 8802770 commit 1fbfc49
Show file tree
Hide file tree
Showing 7 changed files with 235 additions and 11 deletions.
48 changes: 48 additions & 0 deletions Source/Core/Common/x64ABI.cpp
Expand Up @@ -176,6 +176,14 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
ABI_RestoreStack(2 * 4);
}

void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) {
ABI_AlignStack(2 * 4);
PUSH(32, Imm32((u32)param2));
PUSH(32, Imm32(param1));
CALL(func);
ABI_RestoreStack(2 * 4);
}

void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) {
ABI_AlignStack(3 * 4);
PUSH(32, Imm32(param3));
Expand Down Expand Up @@ -204,6 +212,14 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2,u32 param
ABI_RestoreStack(4 * 4);
}

void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) {
ABI_AlignStack(3 * 4);
PUSH(32, Imm32(param2));
PUSH(32, Imm32((u32)param1));
CALL(func);
ABI_RestoreStack(3 * 4);
}

void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2,u32 param3) {
ABI_AlignStack(3 * 4);
PUSH(32, Imm32(param3));
Expand Down Expand Up @@ -344,6 +360,22 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
ABI_RestoreStack(0);
}

void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) {
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(64, R(ABI_PARAM2), Imm64((u64)param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), Imm64((u64)func));
CALLptr(R(RAX));
} else {
CALL(func);
}
ABI_RestoreStack(0);
}

void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) {
ABI_AlignStack(0);
MOV(32, R(ABI_PARAM1), Imm32(param1));
Expand Down Expand Up @@ -396,6 +428,22 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para
ABI_RestoreStack(0);
}

void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) {
ABI_AlignStack(0);
MOV(64, R(ABI_PARAM1), Imm64((u64)param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), Imm64((u64)func));
CALLptr(R(RAX));
} else {
CALL(func);
}
ABI_RestoreStack(0);
}

void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) {
ABI_AlignStack(0);
MOV(64, R(ABI_PARAM1), Imm64((u64)param1));
Expand Down
29 changes: 26 additions & 3 deletions Source/Core/Common/x64Emitter.h
Expand Up @@ -8,6 +8,7 @@

#include <cstddef>
#include <cstring>
#include <functional>

#include "Common/Common.h"
#include "Common/MemoryUtil.h"
Expand Down Expand Up @@ -171,7 +172,7 @@ struct OpArg
u16 indexReg;
};

inline OpArg M(void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
inline OpArg MDisp(X64Reg value, int offset) {
Expand All @@ -194,9 +195,9 @@ inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
#ifdef _ARCH_64
inline OpArg ImmPtr(void* imm) {return Imm64((u64)imm);}
inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
#else
inline OpArg ImmPtr(void* imm) {return Imm32((u32)imm);}
inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);}
#endif
inline u32 PtrOffset(void* ptr, void* base) {
#ifdef _ARCH_64
Expand Down Expand Up @@ -671,9 +672,11 @@ class XEmitter
// These will destroy the 1 or 2 first "parameter regs".
void ABI_CallFunctionC(void *func, u32 param1);
void ABI_CallFunctionCC(void *func, u32 param1, u32 param2);
void ABI_CallFunctionCP(void *func, u32 param1, void *param2);
void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3);
void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3);
void ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2,u32 param3, void *param4);
void ABI_CallFunctionPC(void *func, void *param1, u32 param2);
void ABI_CallFunctionPPC(void *func, void *param1, void *param2,u32 param3);
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1);
Expand Down Expand Up @@ -737,6 +740,26 @@ class XEmitter
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x

#endif

// Utility to generate a call to a std::function object.
//
// Unfortunately, calling operator() directly is undefined behavior in C++
// (this method might be a thunk in the case of multi-inheritance) so we
// have to go through a trampoline function.
template <typename T, typename... Args>
static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
Args... args)
{
(*f)(args...);
}

template <typename T, typename... Args>
void ABI_CallLambdaC(const std::function<T(Args...)>* f, u32 p1)
{
// Double casting is required by VC++ for some reason.
auto trampoline = (void(*)())&XEmitter::CallLambdaTrampoline<T, Args...>;
ABI_CallFunctionPC((void*)trampoline, const_cast<void*>((const void*)f), p1);
}
}; // class XEmitter


Expand Down
12 changes: 6 additions & 6 deletions Source/Core/Core/HW/MMIO.cpp
Expand Up @@ -148,12 +148,12 @@ class ComplexHandlingMethod : public ReadHandlingMethod<T>,

virtual void AcceptReadVisitor(ReadHandlingMethodVisitor<T>& v) const
{
v.VisitComplex(read_lambda_);
v.VisitComplex(&read_lambda_);
}

virtual void AcceptWriteVisitor(WriteHandlingMethodVisitor<T>& v) const
{
v.VisitComplex(write_lambda_);
v.VisitComplex(&write_lambda_);
}

private:
Expand Down Expand Up @@ -313,9 +313,9 @@ void ReadHandler<T>::ResetMethod(ReadHandlingMethod<T>* method)
ret = [addr, mask](u32) { return *addr & mask; };
}

virtual void VisitComplex(std::function<T(u32)> lambda)
virtual void VisitComplex(const std::function<T(u32)>* lambda)
{
ret = lambda;
ret = *lambda;
}
};

Expand Down Expand Up @@ -367,9 +367,9 @@ void WriteHandler<T>::ResetMethod(WriteHandlingMethod<T>* method)
ret = [ptr, mask](u32, T val) { *ptr = val & mask; };
}

virtual void VisitComplex(std::function<void(u32, T)> lambda)
virtual void VisitComplex(const std::function<void(u32, T)>* lambda)
{
ret = lambda;
ret = *lambda;
}
};

Expand Down
13 changes: 13 additions & 0 deletions Source/Core/Core/HW/MMIO.h
Expand Up @@ -31,6 +31,19 @@ enum Block
const u32 BLOCK_SIZE = 0x10000;
const u32 NUM_MMIOS = NUM_BLOCKS * BLOCK_SIZE;

// Checks if a given physical memory address refers to the MMIO address range.
// In practice, most games use a virtual memory mapping (via BATs set in the
// IPL) that matches the physical memory mapping for MMIOs.
//
// We have a special exception here for FIFO writes: these are handled via a
// different mechanism and should not go through the normal MMIO access
// interface.
inline bool IsMMIOAddress(u32 address)
{
return ((address & 0xE0000000) == 0xC0000000) &&
((address & 0x0000FFFF) != 0x00008000);
}

// Compute the internal unique ID for a given MMIO address. This ID is computed
// from a very simple formula: (block_id << 16) | lower_16_bits(address).
//
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/HW/MMIOHandlers.h
Expand Up @@ -88,15 +88,15 @@ class ReadHandlingMethodVisitor
public:
virtual void VisitConstant(T value) = 0;
virtual void VisitDirect(const T* addr, u32 mask) = 0;
virtual void VisitComplex(std::function<T(u32)> lambda) = 0;
virtual void VisitComplex(const std::function<T(u32)>* lambda) = 0;
};
template <typename T>
class WriteHandlingMethodVisitor
{
public:
virtual void VisitNop() = 0;
virtual void VisitDirect(T* addr, u32 mask) = 0;
virtual void VisitComplex(std::function<void(u32, T)> lambda) = 0;
virtual void VisitComplex(const std::function<void(u32, T)>* lambda) = 0;
};

// These classes are INTERNAL. Do not use outside of the MMIO implementation
Expand Down
133 changes: 133 additions & 0 deletions Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
Expand Up @@ -6,6 +6,8 @@

#include "Common/Common.h"
#include "Common/CPUDetect.h"

#include "Core/HW/MMIO.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
#include "Core/PowerPC/JitCommon/JitBase.h"

Expand Down Expand Up @@ -118,6 +120,122 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
return result;
}

// Visitor that generates code to read a MMIO value to EAX.
template <typename T>
class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>
{
public:
MMIOReadCodeGenerator(Gen::XCodeBlock* code, u32 registers_in_use,
Gen::X64Reg dst_reg, u32 address, bool sign_extend)
: m_code(code), m_registers_in_use(registers_in_use), m_dst_reg(dst_reg),
m_address(address), m_sign_extend(sign_extend)
{
}

virtual void VisitConstant(T value)
{
LoadConstantToReg(8 * sizeof (T), value);
}
virtual void VisitDirect(const T* addr, u32 mask)
{
LoadAddrMaskToReg(8 * sizeof (T), addr, mask);
}
virtual void VisitComplex(const std::function<T(u32)>* lambda)
{
CallLambda(8 * sizeof (T), lambda);
}

private:
// Generates code to load a constant to the destination register. In
// practice it would be better to avoid using a register for this, but it
// would require refactoring a lot of JIT code.
void LoadConstantToReg(int sbits, u32 value)
{
if (m_sign_extend)
{
u32 sign = !!(value & (1 << (sbits - 1)));
value |= sign * ((0xFFFFFFFF >> sbits) << sbits);
}
m_code->MOV(32, R(m_dst_reg), Gen::Imm32(value));
}

// Generate the proper MOV instruction depending on whether the read should
// be sign extended or zero extended.
void MoveOpArgToReg(int sbits, Gen::OpArg arg)
{
if (m_sign_extend)
m_code->MOVSX(32, sbits, m_dst_reg, arg);
else
m_code->MOVZX(32, sbits, m_dst_reg, arg);
}

void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{
#ifdef _ARCH_64
m_code->MOV(64, R(EAX), ImmPtr(ptr));
#else
m_code->MOV(32, R(EAX), ImmPtr(ptr));
#endif
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. 2/3).
u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones)
MoveOpArgToReg(sbits, MDisp(EAX, 0));
else
{
m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0));
m_code->AND(32, R(m_dst_reg), Imm32(mask));
if (m_sign_extend)
m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg));
}
}

void CallLambda(int sbits, const std::function<T(u32)>* lambda)
{
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false);
m_code->ABI_CallLambdaC(lambda, m_address);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false);
MoveOpArgToReg(sbits, R(EAX));
}

Gen::XCodeBlock* m_code;
u32 m_registers_in_use;
Gen::X64Reg m_dst_reg;
u32 m_address;
bool m_sign_extend;
};

void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
u32 registers_in_use, u32 address,
int access_size, bool sign_extend)
{
switch (access_size)
{
case 8:
{
MMIOReadCodeGenerator<u8> gen(this, registers_in_use, reg_value,
address, sign_extend);
mmio->GetHandlerForRead8(address).Visit(gen);
break;
}
case 16:
{
MMIOReadCodeGenerator<u16> gen(this, registers_in_use, reg_value,
address, sign_extend);
mmio->GetHandlerForRead16(address).Visit(gen);
break;
}
case 32:
{
MMIOReadCodeGenerator<u32> gen(this, registers_in_use, reg_value,
address, sign_extend);
mmio->GetHandlerForRead32(address).Visit(gen);
break;
}
}
}

void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
{
if (!jit->js.memcheck)
Expand Down Expand Up @@ -157,10 +275,25 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
if (opAddress.IsImm())
{
u32 address = (u32)opAddress.offset + offset;

// If we know the address, try the following loading methods in
// order:
//
// 1. If the address is in RAM, generate an unsafe load (directly
// access the RAM buffer and load from there).
// 2. If the address is in the MMIO range, find the appropriate
// MMIO handler and generate the code to load using the handler.
// 3. Otherwise, just generate a call to Memory::Read_* with the
// address hardcoded.
if ((address & mem_mask) == 0)
{
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
}
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address))
{
MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse,
address, accessSize, signExtend);
}
else
{
ABI_PushRegistersAndAdjustStack(registersInUse, false);
Expand Down
7 changes: 7 additions & 0 deletions Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
Expand Up @@ -8,6 +8,8 @@

#include "Common/x64Emitter.h"

namespace MMIO { class Mapping; }

#define MEMCHECK_START \
FixupBranch memException; \
if (jit->js.memcheck) \
Expand All @@ -28,6 +30,11 @@ class EmuCodeBlock : public Gen::XCodeBlock
// these return the address of the MOV, for backpatching
u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend);

// Generate a load/write from the MMIO handler for a given address. Only
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, u32 registers_in_use, u32 address, int access_size, bool sign_extend);

enum SafeLoadStoreFlags
{
SAFE_LOADSTORE_NO_SWAP = 1,
Expand Down

0 comments on commit 1fbfc49

Please sign in to comment.