Skip to content
Permalink
Browse files

x64Emitter: Ensure op-JCC pairs have appropriate padding beforehand

  • Loading branch information
MerryMage committed Nov 19, 2019
1 parent 701212e commit 42c03165fb4f3a4ad7006d15b8828f8db8551d83
Showing with 112 additions and 12 deletions.
  1. +85 −12 Source/Core/Common/x64Emitter.cpp
  2. +23 −0 Source/Core/Common/x64Emitter.h
  3. +4 −0 Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
@@ -426,6 +426,73 @@ void XEmitter::AddJccErratumPadding(size_t instruction_size)
NOP(remaining);
}

void XEmitter::AddJccErratumPaddingFusable(CCFlags cc, size_t jcc_size)
{
if (!cpu_info.bJccErratum)
{
return;
}

const bool is_fusable = [this, cc] {
if (code != jcc_fusion_last_inst_end)
{
return false;
}

switch (jcc_fusion_type)
{
case JccFusionType::All:
return true;
case JccFusionType::Numeric:
switch (cc)
{
case CC_B:
case CC_AE:
case CC_E:
case CC_NE:
case CC_BE:
case CC_A:
case CC_L:
case CC_GE:
case CC_LE:
case CC_G:
return true;
case CC_O:
case CC_NO:
case CC_S:
case CC_NS:
case CC_P:
case CC_NP:
return false;
}
default:
return false;
}
}();

if (!is_fusable)
{
AddJccErratumPadding(jcc_size);
return;
}

const size_t op_size = jcc_fusion_last_inst_end - jcc_fusion_last_inst_start;

constexpr u64 cache_line_size = 32; // bytes
const u64 remaining =
cache_line_size - (reinterpret_cast<u64>(jcc_fusion_last_inst_start) & (cache_line_size - 1));

if (remaining > op_size + jcc_size)
{
return;
}

std::memmove(jcc_fusion_last_inst_start + remaining, jcc_fusion_last_inst_start, op_size);
code = jcc_fusion_last_inst_start;
NOP(remaining);
code += op_size;
}

void XEmitter::JMP(const u8* addr, bool force5Bytes)
{
u64 fn = (u64)addr;
@@ -559,14 +626,14 @@ FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes)
branch.type = force5bytes ? FixupBranch::Type::Branch32Bit : FixupBranch::Type::Branch8Bit;
if (!force5bytes)
{
AddJccErratumPadding(2);
AddJccErratumPaddingFusable(conditionCode, 2);
// 8 bits will do
Write8(0x70 + conditionCode);
Write8(0);
}
else
{
AddJccErratumPadding(6);
AddJccErratumPaddingFusable(conditionCode, 6);
Write8(0x0F);
Write8(0x80 + conditionCode);
Write32(0);
@@ -577,14 +644,14 @@ FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes)

void XEmitter::J_CC(CCFlags conditionCode, const u8* addr)
{
AddJccErratumPadding(2);
AddJccErratumPaddingFusable(conditionCode, 2);

u64 fn = (u64)addr;
s64 distance = (s64)(fn - ((u64)code + 2));

if (distance < -0x80 || distance >= 0x80)
{
AddJccErratumPadding(6);
AddJccErratumPaddingFusable(conditionCode, 6);
distance = (s64)(fn - ((u64)code + 6));
ASSERT_MSG(DYNA_REC, distance >= -0x80000000LL && distance < 0x80000000LL,
"Jump target too far away, needs indirect register");
@@ -594,7 +661,6 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr)
}
else
{
AddJccErratumPadding(2);
Write8(0x70 + conditionCode);
Write8((u8)(s8)distance);
}
@@ -1620,7 +1686,8 @@ void XEmitter::WriteNormalOp(int bits, NormalOp op, const OpArg& a1, const OpArg
void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
WriteNormalOp(bits, NormalOp::ADD, a1, a2);
EmitJccFusableInstruction(JccFusionType::Numeric,
[&] { WriteNormalOp(bits, NormalOp::ADD, a1, a2); });
}
void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2)
{
@@ -1630,7 +1697,8 @@ void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2)
void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
WriteNormalOp(bits, NormalOp::SUB, a1, a2);
EmitJccFusableInstruction(JccFusionType::Numeric,
[&] { WriteNormalOp(bits, NormalOp::SUB, a1, a2); });
}
void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2)
{
@@ -1640,7 +1708,8 @@ void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2)
void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
WriteNormalOp(bits, NormalOp::AND, a1, a2);
EmitJccFusableInstruction(JccFusionType::All,
[&] { WriteNormalOp(bits, NormalOp::AND, a1, a2); });
}
void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2)
{
@@ -1667,12 +1736,14 @@ void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2)
void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
WriteNormalOp(bits, NormalOp::TEST, a1, a2);
EmitJccFusableInstruction(JccFusionType::All,
[&] { WriteNormalOp(bits, NormalOp::TEST, a1, a2); });
}
void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
WriteNormalOp(bits, NormalOp::CMP, a1, a2);
EmitJccFusableInstruction(JccFusionType::Numeric,
[&] { WriteNormalOp(bits, NormalOp::CMP, a1, a2); });
}
void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2)
{
@@ -1683,11 +1754,13 @@ void XEmitter::CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2)
CheckFlags();
if (a1.IsSimpleReg() && a2.IsZero()) // turn 'CMP reg, 0' into shorter 'TEST reg, reg'
{
WriteNormalOp(bits, NormalOp::TEST, a1, a1);
EmitJccFusableInstruction(JccFusionType::All,
[&] { WriteNormalOp(bits, NormalOp::TEST, a1, a1); });
}
else
{
WriteNormalOp(bits, NormalOp::CMP, a1, a2);
EmitJccFusableInstruction(JccFusionType::Numeric,
[&] { WriteNormalOp(bits, NormalOp::CMP, a1, a2); });
}
}

@@ -1171,6 +1171,29 @@ class XEmitter
auto trampoline = &XEmitter::CallLambdaTrampoline<T, Args...>;
ABI_CallFunctionPC(trampoline, reinterpret_cast<const void*>(f), p1);
}

private:
enum class JccFusionType
{
None,
All,
Numeric, // jz jl jg jc jb ja (and their inverses)
};

template <typename Fn>
void EmitJccFusableInstruction(JccFusionType type, Fn&& fn)
{
jcc_fusion_type = type;
jcc_fusion_last_inst_start = code;
fn();
jcc_fusion_last_inst_end = code;
}

void AddJccErratumPaddingFusable(CCFlags cc, size_t instruction_size);

JccFusionType jcc_fusion_type = JccFusionType::None;
u8* jcc_fusion_last_inst_start = nullptr;
u8* jcc_fusion_last_inst_end = nullptr;
}; // class XEmitter

class X64CodeBlock : public Common::CodeBlock<XEmitter>
@@ -81,6 +81,10 @@ void Jit64AsmRoutineManager::Generate()
ResetStack(*this);

SUB(32, PPCSTATE(downcount), R(RSCRATCH2));
// Prevent op-Jcc fusion by inserting a NOP. If this NOP wasn't here, the above SUB may be
// `memmove`-d by the emitter and this would invalidate the below `dispatcher` variable.
// (See also: AddJccErratumPaddingFusable)
NOP();

dispatcher = GetCodePtr();
// Expected result of SUB(32, PPCSTATE(downcount), Imm32(block_cycles)) is in RFLAGS.

0 comments on commit 42c0316

Please sign in to comment.
You can’t perform that action at this time.