Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch '6663'
Should fix some fastmem-related bugs and possibly improve performance a bit.
  • Loading branch information
comex committed Oct 5, 2013
2 parents 8c103a8 + a51eb5f commit f6c0fb7
Show file tree
Hide file tree
Showing 45 changed files with 276 additions and 603 deletions.
1 change: 0 additions & 1 deletion Source/Core/Common/CMakeLists.txt
Expand Up @@ -37,7 +37,6 @@ else()
if(NOT _M_GENERIC) #X86
set(SRCS ${SRCS}
Src/x64FPURoundMode.cpp
Src/x64Thunk.cpp
)
endif()
set(SRCS ${SRCS} Src/x64CPUDetect.cpp)
Expand Down
2 changes: 0 additions & 2 deletions Source/Core/Common/Common.vcxproj
Expand Up @@ -217,7 +217,6 @@
<ClCompile Include="Src\x64CPUDetect.cpp" />
<ClCompile Include="Src\x64Emitter.cpp" />
<ClCompile Include="Src\x64FPURoundMode.cpp" />
<ClCompile Include="Src\x64Thunk.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Atomic.h" />
Expand Down Expand Up @@ -263,7 +262,6 @@
<ClInclude Include="Src\SymbolDB.h" />
<ClInclude Include="Src\SysConf.h" />
<ClInclude Include="Src\Thread.h" />
<ClInclude Include="Src\Thunk.h" />
<ClInclude Include="Src\Timer.h" />
<ClInclude Include="Src\x64ABI.h" />
<ClInclude Include="Src\x64Analyzer.h" />
Expand Down
2 changes: 0 additions & 2 deletions Source/Core/Common/Common.vcxproj.filters
Expand Up @@ -47,7 +47,6 @@
<ClCompile Include="Src\x64ABI.cpp" />
<ClCompile Include="Src\x64CPUDetect.cpp" />
<ClCompile Include="Src\x64FPURoundMode.cpp" />
<ClCompile Include="Src\x64Thunk.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Atomic.h" />
Expand Down Expand Up @@ -84,7 +83,6 @@
<ClInclude Include="Src\SymbolDB.h" />
<ClInclude Include="Src\SysConf.h" />
<ClInclude Include="Src\Thread.h" />
<ClInclude Include="Src\Thunk.h" />
<ClInclude Include="Src\Timer.h" />
<ClInclude Include="Src\x64Analyzer.h" />
<ClInclude Include="Src\x64Emitter.h" />
Expand Down
46 changes: 0 additions & 46 deletions Source/Core/Common/Src/Thunk.h

This file was deleted.

80 changes: 80 additions & 0 deletions Source/Core/Common/Src/x64ABI.cpp
Expand Up @@ -57,6 +57,86 @@ void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) {
}
}

void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog)
{
int regSize =
#ifdef _M_X64
8;
#else
4;
#endif
int shadow = 0;
#if defined(_WIN32) && defined(_M_X64)
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}

void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog)
{
int regSize =
#ifdef _M_X64
8;
#else
4;
#endif
int size = 0;
#if defined(_WIN32) && defined(_M_X64)
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf;

if (size)
ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}

#ifdef _M_IX86 // All32

// Shared code between Win32 and Unix32
Expand Down
13 changes: 12 additions & 1 deletion Source/Core/Common/Src/x64ABI.h
Expand Up @@ -43,6 +43,8 @@
// 32-bit bog standard cdecl, shared between linux and windows
// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about.

#define ALL_CALLEE_SAVED ((1 << EAX) | (1 << ECX) | (1 << EDX))

#else // 64 bit calling convention

#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
Expand All @@ -52,7 +54,12 @@
#define ABI_PARAM3 R8
#define ABI_PARAM4 R9

#else //64-bit Unix (hopefully MacOSX too)
#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \
(1 << R9) | (1 << R10) | (1 << R11) | \
(1 << XMM0) | (1 << XMM1) | (1 << XMM2) | (1 << XMM3) | \
(1 << XMM4) | (1 << XMM5))

#else //64-bit Unix / OS X

#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
Expand All @@ -61,6 +68,10 @@
#define ABI_PARAM5 R8
#define ABI_PARAM6 R9

#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \
(1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \
0xffff0000 /* xmm0..15 */)

#endif // WIN32

#endif // X86
Expand Down
68 changes: 0 additions & 68 deletions Source/Core/Common/Src/x64Emitter.cpp
Expand Up @@ -1634,74 +1634,6 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u
CALLptr(M(impptr));
}

void XEmitter::PushRegistersAndAlignStack(u32 mask)
{
int shadow = 0;
#ifdef _WIN32
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = (count & 1) ? 0 : 8;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}

void XEmitter::PopRegistersAndAlignStack(u32 mask)
{
int size = 0;
#ifdef _WIN32
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += (count & 1) ? 0 : 8;

if (size)
ADD(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}

#endif

}
7 changes: 4 additions & 3 deletions Source/Core/Common/Src/x64Emitter.h
Expand Up @@ -646,6 +646,10 @@ class XEmitter
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();

// A more flexible version of the above.
void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog);
void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog);

unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);
Expand Down Expand Up @@ -691,9 +695,6 @@ class XEmitter

#define DECLARE_IMPORT(x) extern "C" void *__imp_##x

void PushRegistersAndAlignStack(u32 mask);
void PopRegistersAndAlignStack(u32 mask);

#endif
}; // class XEmitter

Expand Down

0 comments on commit f6c0fb7

Please sign in to comment.