Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Generic instruction merging #1834

Merged
merged 1 commit into from
Jan 11, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 7 additions & 18 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ void Jit64::Jit(u32 em_address)
jo.enableBlocklink = false;
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
}
Trace();
Expand Down Expand Up @@ -603,7 +604,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);

js.skipnext = false;
js.skipInstructions = 0;
js.carryFlagSet = false;
js.carryFlagInverted = false;
js.assumeNoPairedQuantize = false;
Expand Down Expand Up @@ -651,27 +652,17 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc

if (i == (code_block.m_num_instructions - 1))
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
js.next_inst_bp = false;
if (Profiler::g_ProfileBlocks)
{
// WARNING - cmp->branch merging will screw this up.
PROFILER_VPUSH;
// get end tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
// tic counter += (end tic - start tic)
PROFILER_UPDATE_TIME(b);
PROFILER_VPOP;
}
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
js.next_op = &ops[i + 1];
js.next_inst_bp = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i + 1].address);
js.isLastInstruction = true;
}

if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
Expand Down Expand Up @@ -856,11 +847,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
}
#endif
if (js.skipnext)
{
js.skipnext = false;
i++; // Skip next instruction
}
i += js.skipInstructions;
js.skipInstructions = 0;
}

u32 function = HLE::GetFunctionIndex(js.blockStart);
Expand Down Expand Up @@ -919,5 +907,6 @@ void Jit64::EnableOptimization()
{
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
}
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class Jit64 : public Jitx86Base
void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val);
void GenerateOverflow();
bool MergeAllowedNextInstructions(int count);
void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
Expand Down
8 changes: 5 additions & 3 deletions Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,12 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT };

// Merge neighboring fcmp and cror (the primary use of cror).
UGeckoInstruction next = js.next_inst;
if (next.OPCD == 19 && next.SUBOP10 == 449 && (next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
UGeckoInstruction next = js.op[1].inst;
if (analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE) &&
MergeAllowedNextInstructions(1) && next.OPCD == 19 && next.SUBOP10 == 449 &&
(next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
{
js.skipnext = true;
js.skipInstructions = 1;
js.downcountAmount++;
int dst = 3 - (next.CRBD & 3);
output[3 - (next.CRBD & 3)] &= ~(1 << dst);
Expand Down
77 changes: 51 additions & 26 deletions Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,30 @@ void Jit64::GenerateOverflow()
SetJumpTarget(exit);
}

bool Jit64::MergeAllowedNextInstructions(int count)
{
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
return false;
// Be careful: a breakpoint kills flags in between instructions
for (int i = 1; i <= count; i++)
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
return false;
if (js.op[i].isBranchTarget)
return false;
}
return true;
}

void Jit64::FinalizeCarry(CCFlags cond)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
// Be careful: a breakpoint kills flags in between instructions
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp)
// Not actually merging instructions, but the effect is equivalent (we can't have breakpoints/etc in between).
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
{
if (cond == CC_C || cond == CC_NC)
{
Expand Down Expand Up @@ -86,7 +102,7 @@ void Jit64::FinalizeCarry(bool ca)
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp)
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
{
if (ca)
STC();
Expand Down Expand Up @@ -331,7 +347,10 @@ bool Jit64::CheckMergedBranch(int crf)
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
return false;

const UGeckoInstruction& next = js.next_inst;
if (!MergeAllowedNextInstructions(1))
return false;

const UGeckoInstruction& next = js.op[1].inst;
return (((next.OPCD == 16 /* bcx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
Expand All @@ -343,33 +362,35 @@ bool Jit64::CheckMergedBranch(int crf)
void Jit64::DoMergedBranch()
{
// Code that handles successful PPC branching.
if (js.next_inst.OPCD == 16) // bcx
const UGeckoInstruction& next = js.op[1].inst;
const u32 nextPC = js.op[1].address;
if (next.OPCD == 16) // bcx
{
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
if (next.LK)
MOV(32, M(&LR), Imm32(nextPC + 4));

u32 destination;
if (js.next_inst.AA)
destination = SignExt16(js.next_inst.BD << 2);
if (next.AA)
destination = SignExt16(next.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4);
destination = nextPC + SignExt16(next.BD << 2);
WriteExit(destination, next.LK, nextPC + 4);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
{
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
if (next.LK)
MOV(32, M(&LR), Imm32(nextPC + 4));
MOV(32, R(RSCRATCH), M(&CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
{
MOV(32, R(RSCRATCH), M(&LR));
if (!m_enable_blr_optimization)
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
if (next.LK)
MOV(32, M(&LR), Imm32(nextPC + 4));
WriteBLRExit();
}
else
Expand All @@ -381,9 +402,11 @@ void Jit64::DoMergedBranch()
void Jit64::DoMergedBranchCondition()
{
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
js.skipInstructions = 1;
const UGeckoInstruction& next = js.op[1].inst;
int test_bit = 8 >> (next.BI & 3);
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
const u32 nextPC = js.op[1].address;

gpr.UnlockAll();
gpr.UnlockAllX();
Expand All @@ -408,16 +431,18 @@ void Jit64::DoMergedBranchCondition()
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
WriteExit(nextPC + 4);
}
}

void Jit64::DoMergedBranchImmediate(s64 val)
{
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
js.skipInstructions = 1;
const UGeckoInstruction& next = js.op[1].inst;
int test_bit = 8 >> (next.BI & 3);
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
const u32 nextPC = js.op[1].address;

gpr.UnlockAll();
gpr.UnlockAllX();
Expand All @@ -441,7 +466,7 @@ void Jit64::DoMergedBranchImmediate(s64 val)
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
WriteExit(nextPC + 4);
}
}

Expand Down
13 changes: 5 additions & 8 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,12 @@ void Jit64::lXXx(UGeckoInstruction inst)
}

// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it.
if (accessSize == 8 && js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 954 &&
js.next_inst.RS == inst.RD && js.next_inst.RA == inst.RD && !js.next_inst.Rc)
if (MergeAllowedNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && js.op[1].inst.SUBOP10 == 954 &&
js.op[1].inst.RS == inst.RD && js.op[1].inst.RA == inst.RD && !js.op[1].inst.Rc)
{
if (PowerPC::GetState() != PowerPC::CPU_STEPPING)
{
js.downcountAmount++;
js.skipnext = true;
signExtend = true;
}
js.downcountAmount++;
js.skipInstructions = 1;
signExtend = true;
}

// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
Expand Down
59 changes: 30 additions & 29 deletions Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,38 +282,38 @@ void Jit64::mfspr(UGeckoInstruction inst)
ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));

// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can.
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F);
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
int n = js.next_inst.RD;
if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) &&
PowerPC::GetState() != PowerPC::CPU_STEPPING && n != d)
if (MergeAllowedNextInstructions(1))
{
js.downcountAmount++;
js.skipnext = true;
gpr.Lock(d, n);
gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX));
}
else
{
gpr.Lock(d);
gpr.BindToRegister(d, false);
if (iIndex == SPR_TU)
const UGeckoInstruction& next = js.op[1].inst;
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can.
u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
int n = next.RD;
if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
{
js.downcountAmount++;
js.skipInstructions = 1;
gpr.Lock(d, n);
gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX));
break;
}
}
gpr.UnlockAllX();
gpr.Lock(d);
gpr.BindToRegister(d, false);
if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX));
break;
}
case SPR_XER:
Expand Down Expand Up @@ -341,6 +341,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
break;
}
gpr.UnlockAllX();
gpr.UnlockAll();
}

Expand Down
9 changes: 0 additions & 9 deletions Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,16 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.downcountAmount += opinfo->numCycles;

if (i == (code_block.m_num_instructions - 1))
{
js.isLastInstruction = true;
js.next_inst = 0;
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
}

u32 function = HLE::GetFunctionIndex(ops[i].address);
if (function != 0)
Expand Down
9 changes: 1 addition & 8 deletions Source/Core/Core/PowerPC/JitArm32/Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);

js.skipnext = false;
js.skipInstructions = 0;
js.compilerPC = nextPC;

// Translate instructions
Expand All @@ -459,13 +459,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
}

if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
Expand Down