Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix stack misalignment fix.
  • Loading branch information
comex committed Sep 22, 2013
1 parent 9a6f28f commit 6209067
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 186 deletions.
245 changes: 104 additions & 141 deletions Source/Core/Common/Src/x64ABI.cpp

Large diffs are not rendered by default.

13 changes: 4 additions & 9 deletions Source/Core/Common/Src/x64Emitter.h
Expand Up @@ -639,7 +639,7 @@ class XEmitter

// Pass a register as a parameter.
void ABI_CallFunctionR(void *func, Gen::X64Reg reg1);
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2);
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2, bool noProlog = false);

// A function that doesn't have any control over what it will do to regs,
// such as the dispatcher, should be surrounded by these.
Expand All @@ -652,14 +652,9 @@ class XEmitter
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();

unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
void ABI_AlignStack(unsigned int frameSize);
void ABI_RestoreStack(unsigned int frameSize);

// Sets up a __cdecl function.
// Only x64 really needs the parameter count.
void ABI_EmitPrologue(int maxCallParams);
void ABI_EmitEpilogue(int maxCallParams);
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);

#ifdef _M_IX86
inline int ABI_GetNumXMMRegs() { return 8; }
Expand Down
20 changes: 6 additions & 14 deletions Source/Core/Common/Src/x64Thunk.cpp
Expand Up @@ -91,35 +91,27 @@ void *ThunkManager::ProtectFunction(void *function, int num_params)
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");

const u8 *call_point = GetCodePtr();
// Make sure to align stack.
#ifdef _M_X64
#ifdef _WIN32
SUB(64, R(ESP), Imm8(0x28));
#else
SUB(64, R(ESP), Imm8(0x8));
#endif
// Make sure to align stack.
ABI_AlignStack(0, true);
CALL((void*)save_regs);
CALL((void*)function);
CALL((void*)load_regs);
#ifdef _WIN32
ADD(64, R(ESP), Imm8(0x28));
#else
ADD(64, R(ESP), Imm8(0x8));
#endif
ABI_RestoreStack(0, true);
RET();
#else
CALL((void*)save_regs);
// Since parameters are in the previous stack frame, not in registers, this takes some
// trickery : we simply re-push the parameters. might not be optimal, but that doesn't really
// matter.
ABI_AlignStack(num_params * 4);
ABI_AlignStack(num_params * 4, true);
unsigned int alignedSize = ABI_GetAlignedFrameSize(num_params * 4);
for (int i = 0; i < num_params; i++) {
// ESP is changing, so we do not need i
PUSH(32, MDisp(ESP, alignedSize - 4));
PUSH(32, MDisp(ESP, alignedSize));
}
CALL(function);
ABI_RestoreStack(num_params * 4);
ABI_RestoreStack(num_params * 4, true);
CALL((void*)load_regs);
RET();
#endif
Expand Down
26 changes: 13 additions & 13 deletions Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp
Expand Up @@ -167,7 +167,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
FixupBranch skip_complex = J();
SetJumpTarget(too_complex);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX, /* noProlog = */ true);
SetJumpTarget(skip_complex);
RET();
#else
Expand All @@ -184,10 +184,10 @@ void CommonAsmRoutines::GenQuantizedStores() {
FixupBranch arg2 = J();
SetJumpTarget(argh);
MOV(32, R(EAX), M(((char*)&psTemp)));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true);
MOV(32, R(EAX), M(((char*)&psTemp)+4));
ADD(32, R(ECX), Imm32(4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true);
SetJumpTarget(arg2);
RET();
#endif
Expand All @@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, false);
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);

RET();

Expand All @@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);

SafeWriteRegToReg(AX, ECX, 16, 0, false);
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);

RET();

Expand All @@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(16, R(AX), M((char*)psTemp + 4));

BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);

RET();

Expand All @@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);

RET();

Expand Down Expand Up @@ -303,11 +303,11 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
// TODO: SafeWriteFloat
MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
} else {
MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, true);
SafeWriteRegToReg(EAX, ECX, 32, 0, true, true);
}*/

const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
Expand All @@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, true);
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
RET();

const u8* storeSingleS8 = AlignCode4();
Expand All @@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, true);
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
RET();

const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
Expand All @@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, true);
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
RET();

const u8* storeSingleS16 = AlignCode4();
Expand All @@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, true);
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
RET();

singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
Expand Down
8 changes: 4 additions & 4 deletions Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
Expand Up @@ -223,7 +223,7 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
}

// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, bool noProlog)
{
if (offset)
ADD(32, R(reg_addr), Imm32((u32)offset));
Expand All @@ -247,9 +247,9 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break;
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr, noProlog); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr, noProlog); break;
}
FixupBranch exit = J();
SetJumpTarget(fast);
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h
Expand Up @@ -16,7 +16,7 @@ class EmuCodeBlock : public Gen::XCodeBlock {
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true, bool noProlog = false);

// Trashes both inputs and EAX.
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr);
Expand Down
5 changes: 3 additions & 2 deletions Source/Core/VideoCommon/Src/VertexLoader.cpp
Expand Up @@ -217,7 +217,7 @@ void VertexLoader::CompileVertexTranslator()
PanicAlert("Trying to recompile a vertex translator");

m_compiledCode = GetCodePtr();
ABI_EmitPrologue(4);
ABI_PushAllCalleeSavedRegsAndAdjustStack();

// Start loop here
const u8 *loop_start = GetCodePtr();
Expand Down Expand Up @@ -499,7 +499,8 @@ void VertexLoader::CompileVertexTranslator()
#endif

J_CC(CC_NZ, loop_start, true);
ABI_EmitEpilogue(4);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
#endif
m_NativeFmt->Initialize(vtx_decl);
}
Expand Down
5 changes: 3 additions & 2 deletions Source/Core/VideoCommon/Src/x64DLCache.cpp
Expand Up @@ -409,7 +409,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)

emitter.AlignCode4();
dl->compiled_code = emitter.GetCodePtr();
emitter.ABI_EmitPrologue(4);
emitter.ABI_PushAllCalleeSavedRegsAndAdjustStack();

while (g_pVideoData < end)
{
Expand Down Expand Up @@ -572,7 +572,8 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
break;
}
}
emitter.ABI_EmitEpilogue(4);
emitter.ABI_PopAllCalleeSavedRegsAndAdjustStack();
emitter.RET();
INCSTAT(stats.numDListsCalled);
INCSTAT(stats.thisFrame.numDListsCalled);
Statistics::SwapDL();
Expand Down

0 comments on commit 6209067

Please sign in to comment.