Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fastmem writes for x86-64.
  • Loading branch information
comex committed Sep 25, 2013
1 parent 18abc33 commit 2a339c9
Show file tree
Hide file tree
Showing 9 changed files with 107 additions and 85 deletions.
25 changes: 0 additions & 25 deletions Source/Core/Common/Src/x64ABI.cpp
Expand Up @@ -411,31 +411,6 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
ABI_RestoreStack(0);
}

void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
PUSH(RCX);
PUSH(RDX);
PUSH(RSI);
PUSH(RDI);
PUSH(R8);
PUSH(R9);
PUSH(R10);
PUSH(R11);
PUSH(R11);
}

void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
POP(R11);
POP(R11);
POP(R10);
POP(R9);
POP(R8);
POP(RDI);
POP(RSI);
POP(RDX);
POP(RCX);
}


#ifdef _WIN32
// Win64 Specific Code

Expand Down
13 changes: 13 additions & 0 deletions Source/Core/Common/Src/x64Analyzer.cpp
Expand Up @@ -153,6 +153,19 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
}
}

case 0x88: // mem <- r8
{
info->isMemoryWrite = true;
if (info->operandSize == 4)
{
info->operandSize = 1;
break;
}
else
return false;
break;
}

case 0x89: // mem <- r16/32/64
{
info->isMemoryWrite = true;
Expand Down
6 changes: 0 additions & 6 deletions Source/Core/Common/Src/x64Emitter.h
Expand Up @@ -646,12 +646,6 @@ class XEmitter
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();

// A function that doesn't know anything about it's surroundings, should
// be surrounded by these to establish a safe environment, where it can roam free.
// An example is a backpatch injected function.
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();

unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);
Expand Down
2 changes: 0 additions & 2 deletions Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
Expand Up @@ -209,8 +209,6 @@ void Jit64::stfd(UGeckoInstruction inst)
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);

MOVAPD(XMM0, fpr.R(s));
MOVD_xmm(R(EAX), XMM0);
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);

Expand Down
4 changes: 0 additions & 4 deletions Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp
Expand Up @@ -1322,9 +1322,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
int addr_scale = SCALE_8;
#endif
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
Jit->ABI_AlignStack(0);
Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
Jit->ABI_RestoreStack(0);
Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I;
regNormalRegClear(RI, I);
Expand Down Expand Up @@ -1429,9 +1427,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
#endif
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->ABI_AlignStack(0);
Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
Jit->ABI_RestoreStack(0);
if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
Expand Down
22 changes: 11 additions & 11 deletions Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp
Expand Up @@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);

SafeWriteRegToReg(AX, ECX, 16, 0, false, true);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(16, R(AX), M((char*)psTemp + 4));

BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -295,19 +295,19 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {

// Easy!
const u8* storeSingleFloat = AlignCode4();
SafeWriteFloatToReg(XMM0, ECX);
SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM);
RET();
/*
if (cpu_info.bSSSE3) {
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
// TODO: SafeWriteFloat
MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
} else {
MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, true, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
}*/

const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
Expand All @@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

const u8* storeSingleS8 = AlignCode4();
Expand All @@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
Expand All @@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

const u8* storeSingleS16 = AlignCode4();
Expand All @@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
Expand Down
83 changes: 51 additions & 32 deletions Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp
Expand Up @@ -67,7 +67,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
X64Reg dataReg = (X64Reg)info.regOperandReg;

// It's a read. Easy.
ABI_PushAllCallerSavedRegsAndAdjustStack();
// It ought to be necessary to align the stack here. Since it seems to not
// affect anybody, I'm not going to add it just to be completely safe about
// performance.

if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
if (info.displacement) {
Expand All @@ -87,8 +90,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
break;
}

ABI_PopAllCallerSavedRegsAndAdjustStack();

if (dataReg != EAX)
{
MOV(32, R(dataReg), R(EAX));
Expand All @@ -109,46 +110,50 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)

#ifdef _M_X64
X64Reg dataReg = (X64Reg)info.regOperandReg;
if (dataReg != EAX)
PanicAlert("Backpatch write - not through EAX");

X64Reg addrReg = (X64Reg)info.scaledReg;

// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
// hardware access - we can take shortcuts.
//if (emAddress == 0xCC008000)
// PanicAlert("Caught a FIFO write");
CMP(32, R(addrReg), Imm32(0xCC008000));
FixupBranch skip_fast = J_CC(CC_NE, false);
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
CALL((void*)jit->GetAsmRoutines()->fifoDirectWrite32);
RET();
SetJumpTarget(skip_fast);
ABI_PushAllCallerSavedRegsAndAdjustStack();
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.

if (dataReg == ABI_PARAM2)
PanicAlert("Incorrect use of SafeWriteRegToReg");
if (addrReg != ABI_PARAM1)
{
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
}
else
{
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
}

if (info.displacement)
{
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
}

SUB(64, R(RSP), Imm8(8));

switch (info.operandSize)
{
case 8:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2));
break;
case 4:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2));
break;
case 2:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2));
break;
case 1:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2));
break;
}
ABI_PopAllCallerSavedRegsAndAdjustStack();

ADD(64, R(RSP), Imm8(8));
RET();
#endif

Expand Down Expand Up @@ -193,21 +198,35 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
}
else
{
PanicAlert("BackPatch : Currently only supporting reads."
"\n\nAttempted to write to %08x.", emAddress);

// TODO: special case FIFO writes. Also, support 32-bit mode.
// Also, debug this so that it actually works correctly :P
XEmitter emitter(codePtr - 2);
// We know it's EAX so the BSWAP before will be two byte. Overwrite it.
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64 *ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16) *ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32) *ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}

u8 *start = codePtr - bswapSize;
XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info);
emitter.CALL((void *)trampoline);
emitter.NOP((int)info.instructionSize - 3);
if (info.instructionSize < 3)
PanicAlert("Instruction too small");
// We entered here with a BSWAP-ed EAX. We'll have to swap it back.
ctx->CTX_RAX = Common::swap32((u32)ctx->CTX_RAX);
return codePtr - 2;
emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr());
return start;
}
return 0;
#else
Expand Down
27 changes: 24 additions & 3 deletions Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
Expand Up @@ -223,8 +223,27 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
}

// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, bool noProlog)
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags)
{
#if defined(_M_X64)
if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem &&
!(flags & (SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_FASTMEM))
#ifdef ENABLE_MEM_CHECK
&& !Core::g_CoreStartupParameter.bEnableDebugging
#endif
)
{
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP));
if (accessSize == 8)
{
NOP(1);
NOP(1);
}
return;
}
#endif

if (offset)
ADD(32, R(reg_addr), Imm32((u32)offset));

Expand All @@ -245,6 +264,8 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
bool noProlog = flags & SAFE_WRITE_NO_PROLOG;
bool swap = !(flags & SAFE_WRITE_NO_SWAP);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break;
Expand All @@ -257,7 +278,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
SetJumpTarget(exit);
}

void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags)
{
if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it.
Expand Down Expand Up @@ -290,7 +311,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
} else {
MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer));
SafeWriteRegToReg(EAX, reg_addr, 32, 0, true);
SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags);
}
}

Expand Down

0 comments on commit 2a339c9

Please sign in to comment.