diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 8812bceb7e0e..d8c315016c72 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1010,9 +1010,9 @@ void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { EncodeData3SrcInst(3, Rd, Rn, Rm, Ra); } -void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - EncodeData3SrcInst(4, Rd, Rn, Rm, Ra); + EncodeData3SrcInst(4, Rd, Rn, Rm, SP); } void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { @@ -1022,9 +1022,9 @@ void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { EncodeData3SrcInst(6, Rd, Rn, Rm, Ra); } -void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - EncodeData3SrcInst(7, Rd, Rn, Rm, Ra); + EncodeData3SrcInst(7, Rd, Rn, Rm, SP); } void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 7cc1d9036186..a7c65e413706 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -478,10 +478,10 @@ class ARM64XEmitter void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); - void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); - void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index a2d6c3026fe5..7da231d70ef3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -115,7 +115,6 @@ class Jit64 : public Jitx86Base void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(s64 val); void GenerateOverflow(); - bool MergeAllowedNextInstructions(int count); void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(bool ca); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 57e3d110f16b..0131ef89ace7 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -50,22 +50,6 @@ void Jit64::GenerateOverflow() SetJumpTarget(exit); } -bool Jit64::MergeAllowedNextInstructions(int count) -{ - if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count) - return false; - // Be careful: a breakpoint kills flags in between instructions - for (int i = 1; i <= count; i++) - { - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && - PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address)) - return false; - if (js.op[i].isBranchTarget) - return false; - } - return true; -} - void Jit64::FinalizeCarry(CCFlags cond) { js.carryFlagSet = false; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 543d186588d6..ff67956cde7c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -320,6 +320,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB BRK(0x666); } } + + i += js.skipInstructions; + js.skipInstructions = 0; } if (code_block.m_memory_exception) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index fab8eb724786..a372bb378d0c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -203,12 +203,80 @@ void JitArm64::mfspr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + int d = inst.RD; switch (iIndex) { + case SPR_TL: + case SPR_TU: + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ARM64Reg XB = EncodeRegTo64(WB); + + // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the + // cost of calling out to C for this is actually significant. + MOVI2R(XA, (u64)&CoreTiming::globalTimer); + LDR(INDEX_UNSIGNED, XA, XA, 0); + MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks); + LDR(INDEX_UNSIGNED, XB, XB, 0); + SUB(XA, XA, XB); + + // It might seem convenient to correct the timer for the block position here for even more accurate + // timing, but as of currently, this can break games. If we end up reading a time *after* the time + // at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only + // 50 downcount remaining, some games don't function correctly, such as Karaoke Party Revolution, + // which won't get past the loading screen. + // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 + ORR(XB, SP, 1, 60); + ADD(XB, XB, 1); + UMULH(XA, XA, XB); + + MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); + LDR(INDEX_UNSIGNED, XB, XB, 0); + ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); + STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL])); + + if (MergeAllowedNextInstructions(1)) + { + const UGeckoInstruction& next = js.op[1].inst; + // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them + // if we can. + u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F); + // Be careful; the actual opcode is for mftb (371), not mfspr (339) + int n = next.RD; + if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d) + { + js.downcountAmount++; + js.skipInstructions = 1; + gpr.BindToRegister(d, false); + gpr.BindToRegister(n, false); + if (iIndex == SPR_TL) + MOV(gpr.R(d), WA); + else + ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); + + if (nextIndex == SPR_TL) + MOV(gpr.R(n), WA); + else + ORR(EncodeRegTo64(gpr.R(n)), SP, XA, ArithOption(XA, ST_LSR, 32)); + + gpr.Unlock(WA, WB); + break; + } + } + gpr.BindToRegister(d, false); + if (iIndex == SPR_TU) + ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); + else + MOV(gpr.R(d), WA); + gpr.Unlock(WA, WB); + } + break; case SPR_XER: { - gpr.BindToRegister(inst.RD, false); - ARM64Reg RD = gpr.R(inst.RD); + gpr.BindToRegister(d, false); + ARM64Reg RD = gpr.R(d); ARM64Reg WA = gpr.GetReg(); LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl)); LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); @@ -220,12 +288,10 @@ void JitArm64::mfspr(UGeckoInstruction inst) break; case SPR_WPAR: case SPR_DEC: - case SPR_TL: - case SPR_TU: FALLBACK_IF(true); default: - gpr.BindToRegister(inst.RD, false); - ARM64Reg RD = gpr.R(inst.RD); + gpr.BindToRegister(d, false); + ARM64Reg RD = gpr.R(d); LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); break; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 097895ef850a..9df078874bbd 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -66,3 +66,19 @@ void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *no DEBUG_LOG(DYNA_REC,"IR_X86 bin: %s\n\n\n", ss.str().c_str()); } } + +bool JitBase::MergeAllowedNextInstructions(int count) +{ + if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count) + return false; + // Be careful: a breakpoint kills flags in between instructions + for (int i = 1; i <= count; i++) + { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && + PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address)) + return false; + if (js.op[i].isBranchTarget) + return false; + } + return true; +} diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 3ff7d74158da..dcd869009a41 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -107,6 +107,8 @@ class JitBase : public CPUCoreBase PPCAnalyst::CodeBlock code_block; PPCAnalyst::PPCAnalyzer analyzer; + bool MergeAllowedNextInstructions(int count); + public: // This should probably be removed from public: JitOptions jo;