Skip to content

Commit

Permalink
Merge pull request #1083 from FioraAeterna/lzcnt
Browse files Browse the repository at this point in the history
Add LZCNT support, use in cntlzw
  • Loading branch information
Sonicadvance1 committed Sep 14, 2014
2 parents 1f7871f + 40b18f0 commit 4e16abd
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 43 deletions.
1 change: 1 addition & 0 deletions Source/Core/Common/x64CPUDetect.cpp
Expand Up @@ -197,6 +197,7 @@ void CPUInfo::Detect()
// Check for more features.
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1) bLAHFSAHF64 = true;
if ((cpu_id[2] >> 5) & 1) bLZCNT = true;
if ((cpu_id[3] >> 29) & 1) bLongMode = true;
}

Expand Down
17 changes: 16 additions & 1 deletion Source/Core/Common/x64Emitter.cpp
Expand Up @@ -750,12 +750,14 @@ void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}

void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2)
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
{
_assert_msg_(DYNA_REC, !src.IsImm(), "WriteBitSearchType - Imm argument");
src.operandReg = (u8)dest;
if (bits == 16)
Write8(0x66);
if (rep)
Write8(0xF3);
src.WriteRex(this, bits, bits);
Write8(0x0F);
Write8(byte2);
Expand All @@ -772,6 +774,19 @@ void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit

void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
{
if (!cpu_info.bBMI1)
PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
WriteBitSearchType(bits, dest, src, 0xBC, true);
}
void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
{
if (!cpu_info.bLZCNT)
PanicAlert("Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
WriteBitSearchType(bits, dest, src, 0xBD, true);
}

void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
{
_assert_msg_(DYNA_REC, !src.IsImm(), "MOVSX - Imm argument");
Expand Down
7 changes: 6 additions & 1 deletion Source/Core/Common/x64Emitter.h
Expand Up @@ -266,7 +266,7 @@ class XEmitter
void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
void WriteMulDivType(int bits, OpArg src, int ext);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
void WriteMXCSR(OpArg arg, int ext);
Expand Down Expand Up @@ -454,6 +454,11 @@ class XEmitter
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
void MOVBE(int dbits, const OpArg& dest, const OpArg& src);

// Available only on AMD >= Phenom or Intel >= Haswell
void LZCNT(int bits, X64Reg dest, OpArg src);
// Note: this one is actually part of BMI1
void TZCNT(int bits, X64Reg dest, OpArg src);

// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
void STMXCSR(OpArg memloc);
void LDMXCSR(OpArg memloc);
Expand Down
20 changes: 13 additions & 7 deletions Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
Expand Up @@ -1905,13 +1905,19 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
else
{
gpr.Lock(a, s);
gpr.KillImmediate(s, true, false);
gpr.BindToRegister(a, (a == s), true);
BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s));
FixupBranch gotone = J_CC(CC_NZ);
MOV(32, gpr.R(a), Imm32(63));
SetJumpTarget(gotone);
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
gpr.BindToRegister(a, a == s, true);
if (cpu_info.bLZCNT)
{
LZCNT(32, gpr.RX(a), gpr.R(s));
}
else
{
BSR(32, gpr.RX(a), gpr.R(s));
FixupBranch gotone = J_CC(CC_NZ);
MOV(32, gpr.R(a), Imm32(63));
SetJumpTarget(gotone);
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
}
gpr.UnlockAll();
}

Expand Down
62 changes: 28 additions & 34 deletions Source/UnitTests/Common/x64EmitterTest.cpp
Expand Up @@ -318,41 +318,35 @@ TEST_F(x64EmitterTest, CMOVcc_Register)
}
}

TEST_F(x64EmitterTest, BSF)
{
emitter->BSF(64, R12, R(RAX));
emitter->BSF(32, R12, R(RAX));
emitter->BSF(16, R12, R(RAX));

emitter->BSF(64, R12, MatR(RAX));
emitter->BSF(32, R12, MatR(RAX));
emitter->BSF(16, R12, MatR(RAX));

ExpectDisassembly("bsf r12, rax "
"bsf r12d, eax "
"bsf r12w, ax "
"bsf r12, qword ptr ds:[rax] "
"bsf r12d, dword ptr ds:[rax] "
"bsf r12w, word ptr ds:[rax]");
}
#define BITSEARCH_TEST(Name) \
TEST_F(x64EmitterTest, Name) \
{ \
struct { \
int bits; \
std::vector<NamedReg> regs; \
std::string size; \
std::string rax_name; \
} regsets[] = { \
{ 16, reg16names, "word", "ax" }, \
{ 32, reg32names, "dword", "eax" }, \
{ 64, reg64names, "qword", "rax" }, \
}; \
for (const auto& regset : regsets) \
for (const auto& r : regset.regs) \
{ \
emitter->Name(regset.bits, r.reg, R(RAX)); \
emitter->Name(regset.bits, RAX, R(r.reg)); \
emitter->Name(regset.bits, r.reg, MatR(RAX)); \
ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \
#Name " " + regset.rax_name + ", " + r.name + " " \
#Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " ); \
} \
}

TEST_F(x64EmitterTest, BSR)
{
emitter->BSR(64, R12, R(RAX));
emitter->BSR(32, R12, R(RAX));
emitter->BSR(16, R12, R(RAX));

emitter->BSR(64, R12, MatR(RAX));
emitter->BSR(32, R12, MatR(RAX));
emitter->BSR(16, R12, MatR(RAX));

ExpectDisassembly("bsr r12, rax "
"bsr r12d, eax "
"bsr r12w, ax "
"bsr r12, qword ptr ds:[rax] "
"bsr r12d, dword ptr ds:[rax] "
"bsr r12w, word ptr ds:[rax]");
}
BITSEARCH_TEST(BSR);
BITSEARCH_TEST(BSF);
BITSEARCH_TEST(LZCNT);
BITSEARCH_TEST(TZCNT);

TEST_F(x64EmitterTest, PREFETCH)
{
Expand Down

0 comments on commit 4e16abd

Please sign in to comment.