Skip to content

Commit

Permalink
Merge pull request #5453 from unknownbrackets/jit-minor
Browse files Browse the repository at this point in the history
Store thunk regs on the stack, not a global
  • Loading branch information
hrydgard committed Feb 15, 2014
2 parents 95af98b + d723315 commit 3e6f725
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Common/ABI.cpp
Expand Up @@ -514,7 +514,7 @@ void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
PUSH(R15);
ABI_AlignStack(0);

// Do this after aligning, beacuse before it's offset by 8.
// Do this after aligning, because before it's offset by 8.
SUB(64, R(RSP), Imm32(XMM_STACK_SPACE));
for (int i = 0; i < 16; ++i)
MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i));
Expand Down
110 changes: 76 additions & 34 deletions Common/Thunk.cpp
Expand Up @@ -24,55 +24,72 @@
namespace
{

#ifndef _M_X64
static u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
static u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
static u16 saved_mxcsr;
#endif

} // namespace

using namespace Gen;

void ThunkManager::Init()
{
#ifdef _M_X64
// Account for the return address.
int stackOffset = 0x8;
int stackPosition;
#endif

AllocCodeSpace(THUNK_ARENA_SIZE);
save_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
STMXCSR(M(&saved_mxcsr));
#ifdef _M_X64
MOV(64, M(saved_gpr_state + 0 ), R(RCX));
MOV(64, M(saved_gpr_state + 8 ), R(RDX));
MOV(64, M(saved_gpr_state + 16), R(R8) );
MOV(64, M(saved_gpr_state + 24), R(R9) );
MOV(64, M(saved_gpr_state + 32), R(R10));
MOV(64, M(saved_gpr_state + 40), R(R11));
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(MDisp(RSP, stackOffset + (i - 2) * 16), (X64Reg)(XMM0 + i));
stackPosition = (ABI_GetNumXMMRegs() - 2) * 2;
STMXCSR(MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(RCX));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(RDX));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(R8) );
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(R9) );
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(R10));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(R11));
#ifndef _WIN32
MOV(64, M(saved_gpr_state + 48), R(RSI));
MOV(64, M(saved_gpr_state + 56), R(RDI));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(RSI));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(RDI));
#endif
MOV(64, M(saved_gpr_state + 64), R(RBX));
MOV(64, MDisp(RSP, stackOffset + (stackPosition++ * 8)), R(RBX));
#else
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
STMXCSR(M(&saved_mxcsr));
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
#endif
RET();

load_regs = GetCodePtr();
LDMXCSR(M(&saved_mxcsr));
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
#ifdef _M_X64
MOV(64, R(RCX), M(saved_gpr_state + 0 ));
MOV(64, R(RDX), M(saved_gpr_state + 8 ));
MOV(64, R(R8) , M(saved_gpr_state + 16));
MOV(64, R(R9) , M(saved_gpr_state + 24));
MOV(64, R(R10), M(saved_gpr_state + 32));
MOV(64, R(R11), M(saved_gpr_state + 40));
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, stackOffset + (i - 2) * 16));
stackPosition = (ABI_GetNumXMMRegs() - 2) * 2;
LDMXCSR(MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(RCX), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(RDX), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(R8) , MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(R9) , MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(R10), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(R11), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
#ifndef _WIN32
MOV(64, R(RSI), M(saved_gpr_state + 48));
MOV(64, R(RDI), M(saved_gpr_state + 56));
MOV(64, R(RSI), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
MOV(64, R(RDI), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
#endif
MOV(64, R(RBX), M(saved_gpr_state + 64));
MOV(64, R(RBX), MDisp(RSP, stackOffset + (stackPosition++ * 8)));
#else
LDMXCSR(M(&saved_mxcsr));
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
#endif
Expand All @@ -91,6 +108,39 @@ void ThunkManager::Shutdown()
FreeCodeSpace();
}

int ThunkManager::ThunkBytesNeeded()
{
int space = (ABI_GetNumXMMRegs() - 2) * 16;
#ifdef _M_X64
// MXCSR
space += 8;
space += 7 * 8;
#ifndef _WIN32
space += 2 * 8;
#endif
#else
// MXCSR
space += 4;
space += 2 * 4;
#endif

// Round up to the nearest 16 just in case.
return (space + 15) & ~15;
}

int ThunkManager::ThunkStackOffset()
{
#ifdef _M_X64
#ifdef _WIN32
return 0x28;
#else
return 0x8;
#endif
#else
return 0;
#endif
}

const void *ThunkManager::ProtectFunction(const void *function, int num_params)
{
std::map<const void *, const u8 *>::iterator iter;
Expand All @@ -103,19 +153,11 @@ const void *ThunkManager::ProtectFunction(const void *function, int num_params)
const u8 *call_point = GetCodePtr();
// Make sure to align stack.
#ifdef _M_X64
#ifdef _WIN32
SUB(64, R(ESP), Imm8(0x28));
#else
SUB(64, R(ESP), Imm8(0x8));
#endif
SUB(64, R(ESP), Imm32(ThunkStackOffset() + ThunkBytesNeeded()));
ABI_CallFunction(save_regs);
ABI_CallFunction(function);
ABI_CallFunction(load_regs);
#ifdef _WIN32
ADD(64, R(ESP), Imm8(0x28));
#else
ADD(64, R(ESP), Imm8(0x8));
#endif
ADD(64, R(ESP), Imm32(ThunkStackOffset() + ThunkBytesNeeded()));
RET();
#else
CALL((const void *)save_regs);
Expand Down
3 changes: 3 additions & 0 deletions Common/Thunk.h
Expand Up @@ -94,6 +94,9 @@ class ThunkManager : public Gen::XCodeBlock
void Init();
void Shutdown();
void Reset();

int ThunkStackOffset();
int ThunkBytesNeeded();
};

#endif // _THUNK_H_

0 comments on commit 3e6f725

Please sign in to comment.