Skip to content

Commit

Permalink
JIT: make instruction merging generic
Browse files Browse the repository at this point in the history
Now it should be easier to merge more than 2-instruction-long sequences.
Also correct some minor inconsistencies in behavior between instruction
merging cases.
  • Loading branch information
FioraAeterna committed Jan 5, 2015
1 parent 76adfa3 commit 7e5b667
Show file tree
Hide file tree
Showing 12 changed files with 111 additions and 122 deletions.
22 changes: 7 additions & 15 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ void Jit64::Jit(u32 em_address)
jo.enableBlocklink = false;
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_FORWARD_JUMP);
}
Expand Down Expand Up @@ -643,7 +644,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);

js.skipnext = false;
js.skipInstructions = 0;
js.carryFlagSet = false;
js.carryFlagInverted = false;
branch_targets.clear();
Expand All @@ -658,27 +659,17 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc

if (i == (code_block.m_num_instructions - 1))
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
js.next_inst_bp = false;
if (Profiler::g_ProfileBlocks)
{
// WARNING - cmp->branch merging will screw this up.
PROFILER_VPUSH;
// get end tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
// tic counter += (end tic - start tic)
PROFILER_UPDATE_TIME(b);
PROFILER_VPOP;
}
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
js.next_op = &ops[i + 1];
js.next_inst_bp = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i + 1].address);
js.isLastInstruction = true;
}

if (analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_FORWARD_JUMP))
Expand Down Expand Up @@ -848,9 +839,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
}
#endif
if (js.skipnext)
while (js.skipInstructions > 0)
{
js.skipnext = false;
js.skipInstructions--;
i++; // Skip next instruction
}
}
Expand Down Expand Up @@ -911,6 +902,7 @@ void Jit64::EnableOptimization()
{
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_FORWARD_JUMP);
}
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/Jit64/Jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class Jit64 : public Jitx86Base
void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val);
void GenerateOverflow();
bool MergeAllowedNextInstruction();
bool MergeAllowedNextInstructions(int count);
void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
Expand Down
8 changes: 5 additions & 3 deletions Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,12 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT };

// Merge neighboring fcmp and cror (the primary use of cror).
UGeckoInstruction next = js.next_inst;
if (MergeAllowedNextInstruction() && next.OPCD == 19 && next.SUBOP10 == 449 && (next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
UGeckoInstruction next = js.op[1].inst;
if (analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE) &&
MergeAllowedNextInstructions(1) && next.OPCD == 19 && next.SUBOP10 == 449 &&
(next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
{
js.skipnext = true;
js.skipInstructions = 1;
js.downcountAmount++;
int dst = 3 - (next.CRBD & 3);
output[3 - (next.CRBD & 3)] &= ~(1 << dst);
Expand Down
97 changes: 57 additions & 40 deletions Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,20 @@ void Jit64::GenerateOverflow()
SetJumpTarget(exit);
}

bool Jit64::MergeAllowedNextInstruction()
bool Jit64::MergeAllowedNextInstructions(int count)
{
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
return false;
// Be careful: a breakpoint kills flags in between instructions
return PowerPC::GetState() != PowerPC::CPU_STEPPING && !js.isLastInstruction && !js.next_inst_bp && !js.next_op->isBranchTarget;
for (int i = 1; i <= count; i++)
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
return false;
if (js.op[i].isBranchTarget)
return false;
}
return true;
}

void Jit64::FinalizeCarry(CCFlags cond)
Expand All @@ -62,7 +72,8 @@ void Jit64::FinalizeCarry(CCFlags cond)
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (MergeAllowedNextInstruction() && js.next_op->wantsCAInFlags)
// Not actually merging instructions, but the effect is equivalent (we can't have breakpoints/etc in between).
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
{
if (cond == CC_C || cond == CC_NC)
{
Expand Down Expand Up @@ -91,7 +102,7 @@ void Jit64::FinalizeCarry(bool ca)
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (MergeAllowedNextInstruction() && js.next_op->wantsCAInFlags)
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
{
if (ca)
STC();
Expand Down Expand Up @@ -336,10 +347,10 @@ bool Jit64::CheckMergedBranch(int crf)
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
return false;

if (!MergeAllowedNextInstruction())
if (!MergeAllowedNextInstructions(1))
return false;

const UGeckoInstruction& next = js.next_inst;
const UGeckoInstruction& next = js.op[1].inst;
return (((next.OPCD == 16 /* bcx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
Expand All @@ -351,33 +362,35 @@ bool Jit64::CheckMergedBranch(int crf)
void Jit64::DoMergedBranch()
{
// Code that handles successful PPC branching.
if (js.next_inst.OPCD == 16) // bcx
const UGeckoInstruction& next = js.op[1].inst;
const u32 nextPC = js.op[1].address;
if (next.OPCD == 16) // bcx
{
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
if (next.LK)
MOV(32, M(&LR), Imm32(nextPC + 4));

u32 destination;
if (js.next_inst.AA)
destination = SignExt16(js.next_inst.BD << 2);
if (next.AA)
destination = SignExt16(next.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4);
destination = nextPC + SignExt16(next.BD << 2);
WriteExit(destination, next.LK, nextPC + 4);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
{
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
if (next.LK)
MOV(32, M(&LR), Imm32(nextPC + 4));
MOV(32, R(RSCRATCH), M(&CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
{
MOV(32, R(RSCRATCH), M(&LR));
if (!m_enable_blr_optimization)
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
if (next.LK)
MOV(32, M(&LR), Imm32(nextPC + 4));
WriteBLRExit();
}
else
Expand All @@ -389,21 +402,23 @@ void Jit64::DoMergedBranch()
void Jit64::DoMergedBranchCondition()
{
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
js.skipInstructions = 1;
const UGeckoInstruction& next = js.op[1].inst;
const u32 nextPC = js.op[1].address;
int test_bit = 8 >> (next.BI & 3);
bool cc = analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
bool forwardJumps = analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_FORWARD_JUMP);
bool jumpInBlock = false;
u32 destination;
if (js.next_inst.OPCD == 16 && cc && forwardJumps && !(test_bit & 1))
if (next.OPCD == 16 && cc && forwardJumps && !(test_bit & 1))
{
if (js.next_inst.AA)
destination = SignExt16(js.next_inst.BD << 2);
if (next.AA)
destination = SignExt16(next.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
jumpInBlock = destination > js.next_compilerPC && destination < js.blockEnd;
destination = nextPC + SignExt16(next.BD << 2);
jumpInBlock = destination > nextPC && destination < js.blockEnd;
}
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE) ^ jumpInBlock;
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE) ^ jumpInBlock;

gpr.UnlockAll();
gpr.UnlockAllX();
Expand All @@ -419,7 +434,7 @@ void Jit64::DoMergedBranchCondition()

if (jumpInBlock)
{
BranchTarget branchData = { pBranch, pBranch, js.downcountAmount, js.fifoBytesThisBlock, js.firstFPInstructionFound, gpr, fpr, js.next_op };
BranchTarget branchData = { pBranch, pBranch, js.downcountAmount, js.fifoBytesThisBlock, js.firstFPInstructionFound, gpr, fpr, &js.op[1] };
branch_targets.insert(std::make_pair(destination, branchData));
}
else
Expand All @@ -436,28 +451,30 @@ void Jit64::DoMergedBranchCondition()
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
WriteExit(nextPC + 4);
}
}

void Jit64::DoMergedBranchImmediate(s64 val)
{
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
js.skipInstructions = 1;
const UGeckoInstruction& next = js.op[1].inst;
const u32 nextPC = js.op[1].address;
int test_bit = 8 >> (next.BI & 3);
bool cc = analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
bool forwardJumps = analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_FORWARD_JUMP);
bool jumpInBlock = false;
u32 destination;
if (js.next_inst.OPCD == 16 && cc && forwardJumps)
if (next.OPCD == 16 && cc && forwardJumps)
{
if (js.next_inst.AA)
destination = SignExt16(js.next_inst.BD << 2);
if (next.AA)
destination = SignExt16(next.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
jumpInBlock = destination > js.next_compilerPC && destination < js.blockEnd;
destination = nextPC + SignExt16(next.BD << 2);
jumpInBlock = destination > nextPC && destination < js.blockEnd;
}
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);

gpr.UnlockAll();
gpr.UnlockAllX();
Expand All @@ -476,7 +493,7 @@ void Jit64::DoMergedBranchImmediate(s64 val)
if (jumpInBlock)
{
FixupBranch pBranch = J(true);
BranchTarget branchData = { pBranch, pBranch, js.downcountAmount, js.fifoBytesThisBlock, js.firstFPInstructionFound, gpr, fpr, js.next_op };
BranchTarget branchData = { pBranch, pBranch, js.downcountAmount, js.fifoBytesThisBlock, js.firstFPInstructionFound, gpr, fpr, &js.op[1] };
branch_targets.insert(std::make_pair(destination, branchData));
}
// IMPORTANT: we can't actually leave the block in this case!! A forward branch is still waiting around for
Expand All @@ -499,7 +516,7 @@ void Jit64::DoMergedBranchImmediate(s64 val)
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
WriteExit(nextPC + 4);
}
}

Expand Down
6 changes: 3 additions & 3 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
}

// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it.
if (MergeAllowedNextInstruction() && accessSize == 8 && js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 954 &&
js.next_inst.RS == inst.RD && js.next_inst.RA == inst.RD && !js.next_inst.Rc)
if (MergeAllowedNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && js.op[1].inst.SUBOP10 == 954 &&
js.op[1].inst.RS == inst.RD && js.op[1].inst.RA == inst.RD && !js.op[1].inst.Rc)
{
js.downcountAmount++;
js.skipnext = true;
js.skipInstructions = 1;
signExtend = true;
}

Expand Down
59 changes: 30 additions & 29 deletions Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,38 +247,38 @@ void Jit64::mfspr(UGeckoInstruction inst)
ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));

// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can.
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F);
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
int n = js.next_inst.RD;
if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) &&
PowerPC::GetState() != PowerPC::CPU_STEPPING && n != d)
if (MergeAllowedNextInstructions(1))
{
js.downcountAmount++;
js.skipnext = true;
gpr.Lock(d, n);
gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX));
}
else
{
gpr.Lock(d);
gpr.BindToRegister(d, false);
if (iIndex == SPR_TU)
const UGeckoInstruction& next = js.op[1].inst;
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can.
u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
int n = next.RD;
if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
{
js.downcountAmount++;
js.skipInstructions = 1;
gpr.Lock(d, n);
gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX));
break;
}
}
gpr.UnlockAllX();
gpr.Lock(d);
gpr.BindToRegister(d, false);
if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX));
break;
}
case SPR_XER:
Expand Down Expand Up @@ -306,6 +306,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
break;
}
gpr.UnlockAllX();
gpr.UnlockAll();
}

Expand Down

0 comments on commit 7e5b667

Please sign in to comment.