Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Save only the registers that need to be saved rather than going throu…
…gh ProtectFunction.
  • Loading branch information
comex committed Sep 25, 2013
1 parent 2a339c9 commit ebe4448
Show file tree
Hide file tree
Showing 14 changed files with 193 additions and 58 deletions.
68 changes: 68 additions & 0 deletions Source/Core/Common/Src/x64Emitter.cpp
Expand Up @@ -1634,6 +1634,74 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u
CALLptr(M(impptr));
}

void XEmitter::PushRegistersAndAlignStack(u32 mask)
{
int shadow = 0;
#ifdef _WIN32
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = (count & 1) ? 0 : 8;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}

void XEmitter::PopRegistersAndAlignStack(u32 mask)
{
int size = 0;
#ifdef _WIN32
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += (count & 1) ? 0 : 8;

if (size)
ADD(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}

#endif

}
3 changes: 3 additions & 0 deletions Source/Core/Common/Src/x64Emitter.h
Expand Up @@ -691,6 +691,9 @@ class XEmitter

#define DECLARE_IMPORT(x) extern "C" void *__imp_##x

void PushRegistersAndAlignStack(u32 mask);
void PopRegistersAndAlignStack(u32 mask);

#endif
}; // class XEmitter

Expand Down
18 changes: 18 additions & 0 deletions Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
Expand Up @@ -737,3 +737,21 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc

return normalEntry;
}

u32 Jit64::RegistersInUse()
{
#ifdef _M_X64
u32 result = 0;
for (int i = 0; i < NUMXREGS; i++)
{
if (!gpr.IsFreeX(i))
result |= (1 << i);
if (!fpr.IsFreeX(i))
result |= (1 << (16 + i));
}
return result;
#else
// not needed
return 0;
#endif
}
2 changes: 2 additions & 0 deletions Source/Core/Core/Src/PowerPC/Jit64/Jit.h
Expand Up @@ -72,6 +72,8 @@ class Jit64 : public Jitx86Base
void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);

u32 RegistersInUse();

JitBlockCache *GetBlockCache() { return &blocks; }

void Trace();
Expand Down
5 changes: 0 additions & 5 deletions Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp
Expand Up @@ -76,11 +76,6 @@ void RegCache::LockX(int x1, int x2, int x3, int x4)
if (x4 != 0xFF) xlocks[x4] = true;
}

bool RegCache::IsFreeX(int xreg) const
{
return xregs[xreg].free && !xlocks[xreg];
}

void RegCache::UnlockAll()
{
for (int i = 0; i < 32; i++)
Expand Down
6 changes: 5 additions & 1 deletion Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h
Expand Up @@ -106,7 +106,11 @@ class RegCache
void UnlockAll();
void UnlockAllX();

bool IsFreeX(int xreg) const;
bool IsFreeX(int xreg) const
{
return xregs[xreg].free && !xlocks[xreg];
}


X64Reg GetFreeXReg();

Expand Down
8 changes: 4 additions & 4 deletions Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp
Expand Up @@ -121,7 +121,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// do our job at first
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(d);
SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend);
SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend);
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
Expand Down Expand Up @@ -193,7 +193,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
}

SafeLoadToEAX(opAddress, accessSize, 0, signExtend);
SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend);

// We must flush immediate values from the following registers because
// they may change at runtime if no MMU exception has been raised
Expand Down Expand Up @@ -373,7 +373,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.Lock(s, a);
MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, offset);
SafeWriteRegToReg(ECX, EDX, accessSize, offset, RegistersInUse());

if (update && offset)
{
Expand Down Expand Up @@ -429,7 +429,7 @@ void Jit64::stXx(UGeckoInstruction inst)
}

MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
SafeWriteRegToReg(ECX, EDX, accessSize, 0, RegistersInUse());

gpr.UnlockAll();
gpr.UnlockAllX();
Expand Down
12 changes: 6 additions & 6 deletions Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
Expand Up @@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst)
}
s32 offset = (s32)(s16)inst.SIMM_16;

SafeLoadToEAX(gpr.R(a), 32, offset, false);
SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false);

MEMCHECK_START

Expand Down Expand Up @@ -207,10 +207,10 @@ void Jit64::stfd(UGeckoInstruction inst)
MOVAPD(XMM0, fpr.R(s));
PSRLQ(XMM0, 32);
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));

LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());

SetJumpTarget(exit);

Expand Down Expand Up @@ -282,7 +282,7 @@ void Jit64::stfs(UGeckoInstruction inst)
MEMCHECK_END
}
CVTSD2SS(XMM0, fpr.R(s));
SafeWriteFloatToReg(XMM0, ABI_PARAM2);
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
Expand All @@ -302,7 +302,7 @@ void Jit64::stfsx(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
CVTSD2SS(XMM0, fpr.R(inst.RS));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());

gpr.UnlockAllX();
fpr.UnlockAll();
Expand Down Expand Up @@ -337,7 +337,7 @@ void Jit64::lfsx(UGeckoInstruction inst)

MEMCHECK_END
} else {
SafeLoadToEAX(R(EAX), 32, 0, false);
SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false);

MEMCHECK_START

Expand Down
25 changes: 21 additions & 4 deletions Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp
Expand Up @@ -77,6 +77,23 @@ struct RegInfo {
RegInfo(RegInfo&); // DO NOT IMPLEMENT
};

static u32 regsInUse(RegInfo& R) {
#ifdef _M_X64
u32 result = 0;
for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++)
{
if (R.regs[i] != 0)
result |= (1 << i);
if (R.fregs[i] != 0)
result |= (1 << (16 + i));
}
return result;
#else
// not needed
return 0;
#endif
}

static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) {
unsigned& info = R.IInfo[Op - R.FirstI];
if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1);
Expand Down Expand Up @@ -634,7 +651,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
if (RI.MakeProfile) {
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
}
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0);
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI));
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
}
Expand Down Expand Up @@ -1337,7 +1354,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->MOV(32, R(EAX), loc1);
}
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI));
if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
Expand Down Expand Up @@ -1400,12 +1417,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->PSRLQ(XMM0, 32);
Jit->MOVD_xmm(R(EAX), XMM0);
Jit->MOV(32, R(ECX), address);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI));

Jit->MOVAPD(XMM0, value);
Jit->MOVD_xmm(R(EAX), XMM0);
Jit->MOV(32, R(ECX), address);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4, regsInUse(RI));
Jit->SetJumpTarget(exit);

if (RI.IInfo[I - RI.FirstI] & 4)
Expand Down
18 changes: 9 additions & 9 deletions Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp
Expand Up @@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);

SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(16, R(AX), M((char*)psTemp + 4));

BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);

RET();

Expand All @@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {

// Easy!
const u8* storeSingleFloat = AlignCode4();
SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM);
SafeWriteFloatToReg(XMM0, ECX, 0, SAFE_WRITE_NO_FASTMEM);
RET();
/*
if (cpu_info.bSSSE3) {
Expand All @@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

const u8* storeSingleS8 = AlignCode4();
Expand All @@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
Expand All @@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

const u8* storeSingleS16 = AlignCode4();
Expand All @@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();

singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
Expand Down

0 comments on commit ebe4448

Please sign in to comment.