Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #1024 from comex/abi-cleanup
ABI cleanup
  • Loading branch information
comex committed Sep 8, 2014
2 parents 262fa1d + 4dc0906 commit 7fb6628
Show file tree
Hide file tree
Showing 12 changed files with 104 additions and 204 deletions.
199 changes: 51 additions & 148 deletions Source/Core/Common/x64ABI.cpp

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Source/Core/Common/x64ABI.h
Expand Up @@ -53,5 +53,7 @@

#endif // WIN32

#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED)

#define ABI_RETURN RAX

21 changes: 8 additions & 13 deletions Source/Core/Common/x64Emitter.h
Expand Up @@ -281,6 +281,8 @@ class XEmitter
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);

void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);

protected:
inline void Write8(u8 value) {*code++ = value;}
inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
Expand Down Expand Up @@ -751,23 +753,16 @@ class XEmitter

// Pass a register as a parameter.
void ABI_CallFunctionR(void *func, X64Reg reg1);
void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false);
void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2);

// Helper method for the above, or can be used separately.
void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp);

// A function that doesn't have any control over what it will do to regs,
// such as the dispatcher, should be surrounded by these.
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();

// A more flexible version of the above.
void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog);
void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog);

unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);
// Saves/restores the registers and adjusts the stack to be aligned as
// required by the ABI, where the previous alignment was as specified.
// Push returns the size of the shadow space, i.e. the offset of the frame.
size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);

inline int ABI_GetNumXMMRegs() { return 16; }

Expand Down
6 changes: 4 additions & 2 deletions Source/Core/Core/DSP/DSPEmitter.cpp
Expand Up @@ -384,7 +384,9 @@ const u8 *DSPEmitter::CompileStub()
void DSPEmitter::CompileDispatcher()
{
enterDispatcher = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack();
// We don't use floating point (high 16 bits).
u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff;
ABI_PushRegistersAndAdjustStack(registers_used, 8);

const u8 *dispatcherLoop = GetCodePtr();

Expand Down Expand Up @@ -419,6 +421,6 @@ void DSPEmitter::CompileDispatcher()
SetJumpTarget(exceptionExit);
}
//MOV(32, M(&cyclesLeft), Imm32(0));
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(registers_used, 8);
RET();
}
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Expand Up @@ -495,9 +495,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.fifoBytesThisBlock -= 32;
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
}

u32 function = HLE::GetFunctionIndex(ops[i].address);
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
Expand Up @@ -16,7 +16,7 @@ using namespace Gen;
void Jit64AsmRoutineManager::Generate()
{
enterCode = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack();
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);

// Two statically allocated registers.
MOV(64, R(RMEM), Imm64((u64)Memory::base));
Expand All @@ -39,7 +39,7 @@ void Jit64AsmRoutineManager::Generate()
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
SetJumpTarget(noBreakpoint);
SetJumpTarget(notStepping);
Expand Down Expand Up @@ -126,7 +126,7 @@ void Jit64AsmRoutineManager::Generate()
J_CC(CC_Z, outerLoop);

//Landing pad for drec space
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();

GenerateCommon();
Expand Down
12 changes: 6 additions & 6 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Expand Up @@ -116,11 +116,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
FixupBranch noIdle = J_CC(CC_NZ);

u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);

ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);

ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
//MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
Expand Down Expand Up @@ -285,9 +285,9 @@ void Jit64::dcbz(UGeckoInstruction inst)
// supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure.
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

FixupBranch exit = J();
SetJumpTarget(fast);
Expand Down Expand Up @@ -374,7 +374,7 @@ void Jit64::stX(UGeckoInstruction inst)
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));

u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 32:
Expand All @@ -387,7 +387,7 @@ void Jit64::stX(UGeckoInstruction inst)
ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (update)
gpr.SetImmediate32(a, addr);
return;
Expand Down
2 changes: 0 additions & 2 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
Expand Up @@ -87,9 +87,7 @@ void Jit64::psq_l(UGeckoInstruction inst)
if (inst.W)
OR(32, R(RSCRATCH2), Imm8(8));

ABI_AlignStack(0);
CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
ABI_RestoreStack(0);

// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access

Expand Down
13 changes: 7 additions & 6 deletions Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
Expand Up @@ -110,9 +110,9 @@ void CommonAsmRoutines::GenFrsqrte()
SetJumpTarget(complex1);
SetJumpTarget(complex2);
SetJumpTarget(complex3);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction((void *)&MathUtil::ApproximateReciprocalSquareRoot);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();
}

Expand Down Expand Up @@ -169,9 +169,9 @@ void CommonAsmRoutines::GenFres()

SetJumpTarget(complex1);
SetJumpTarget(complex2);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction((void *)&MathUtil::ApproximateReciprocal);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();
}

Expand Down Expand Up @@ -258,9 +258,10 @@ void CommonAsmRoutines::GenQuantizedStores()
SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH);
FixupBranch skip_complex = J(true);
SetJumpTarget(too_complex);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
// RSP alignment here is 8 due to the call.
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
SetJumpTarget(skip_complex);
RET();

Expand Down
12 changes: 5 additions & 7 deletions Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
Expand Up @@ -56,10 +56,8 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
X64Reg dataReg = (X64Reg)info.regOperandReg;

// It's a read. Easy.
// It ought to be necessary to align the stack here. Since it seems to not
// affect anybody, I'm not going to add it just to be completely safe about
// performance.
ABI_PushRegistersAndAdjustStack(registersInUse, true);
// RSP alignment here is 8 due to the call.
ABI_PushRegistersAndAdjustStack(registersInUse, 8);

if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
Expand Down Expand Up @@ -91,7 +89,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
MOV(32, R(dataReg), R(ABI_RETURN));
}

ABI_PopRegistersAndAdjustStack(registersInUse, true);
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;
}
Expand All @@ -115,7 +113,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc));

ABI_PushRegistersAndAdjustStack(registersInUse, true);
ABI_PushRegistersAndAdjustStack(registersInUse, 8);

MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3);

Expand All @@ -140,7 +138,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
break;
}

ABI_PopRegistersAndAdjustStack(registersInUse, true);
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();

return trampoline;
Expand Down
26 changes: 13 additions & 13 deletions Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
Expand Up @@ -204,9 +204,9 @@ class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>

void CallLambda(int sbits, const std::function<T(u32)>* lambda)
{
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false);
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, 0);
m_code->ABI_CallLambdaC(lambda, m_address);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, 0);
MoveOpArgToReg(sbits, R(ABI_RETURN));
}

Expand Down Expand Up @@ -305,15 +305,15 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
else
{
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break;
case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break;
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

MEMCHECK_START

Expand Down Expand Up @@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,

FixupBranch fast = J_CC(CC_Z, true);

ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64:
Expand All @@ -366,7 +366,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

MEMCHECK_START

Expand Down Expand Up @@ -470,25 +470,25 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
FixupBranch fast = J_CC(CC_Z, true);
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG));
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
switch (accessSize)
{
case 64:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false);
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr);
break;
case 32:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false);
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr);
break;
case 16:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false);
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr);
break;
case 8:
ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false);
ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, noProlog);
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
Expand Down
5 changes: 3 additions & 2 deletions Source/Core/VideoCommon/VertexLoader.cpp
Expand Up @@ -584,7 +584,8 @@ void VertexLoader::CompileVertexTranslator()
PanicAlert("Trying to recompile a vertex translator");

m_compiledCode = GetCodePtr();
ABI_PushAllCalleeSavedRegsAndAdjustStack();
// We don't use any callee saved registers or anything but RAX.
ABI_PushRegistersAndAdjustStack(0, 8);

// Start loop here
const u8 *loop_start = GetCodePtr();
Expand Down Expand Up @@ -845,7 +846,7 @@ void VertexLoader::CompileVertexTranslator()
SUB(32, MatR(RAX), Imm8(1));

J_CC(CC_NZ, loop_start);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
ABI_PopRegistersAndAdjustStack(0, 8);
RET();
#endif
}
Expand Down

0 comments on commit 7fb6628

Please sign in to comment.