From 8ffb0101fe8c401267ce43edef4eaa75b412fe53 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 31 Dec 2017 16:37:49 -0800 Subject: [PATCH 1/6] jit: Report blocks with uneaten VFPU prefixes. There may be options to avoid, like continuing these blocks, especially if they're likely or something. --- Core/MIPS/ARM/ArmJit.cpp | 2 +- Core/MIPS/ARM64/Arm64Jit.cpp | 2 +- Core/MIPS/IR/IRFrontend.cpp | 4 ++-- Core/MIPS/IR/IRFrontend.h | 2 +- Core/MIPS/IR/IRJit.cpp | 2 +- Core/MIPS/x86/Jit.cpp | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 0a7c3ad538bf..050cef8025ca 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -218,7 +218,7 @@ void ArmJit::Compile(u32 em_address) { // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); + WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); js.LogPrefix(); // Let's try that one more time. We won't get back here because we toggled the value. diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 86b80a3a57b7..e1aaea334f84 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -204,7 +204,7 @@ void Arm64Jit::Compile(u32 em_address) { // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); + WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); js.LogPrefix(); // Let's try that one more time. We won't get back here because we toggled the value. diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 17985ab8355b..7b8ab0f50dd8 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -105,7 +105,7 @@ void IRFrontend::CompileDelaySlot() { js.inDelaySlot = false; } -bool IRFrontend::CheckRounding() { +bool IRFrontend::CheckRounding(u32 blockAddress) { bool cleanSlate = false; if (js.hasSetRounding && !js.lastSetRounding) { WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); @@ -116,7 +116,7 @@ bool IRFrontend::CheckRounding() { // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "An uneaten prefix at end of block"); + WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block for %08x", blockAddress); logBlocks = 1; js.LogPrefix(); diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h index b3fb8cdecf5a..e59cf3c70fb0 100644 --- a/Core/MIPS/IR/IRFrontend.h +++ b/Core/MIPS/IR/IRFrontend.h @@ -86,7 +86,7 @@ class IRFrontend : public MIPSFrontendInterface { int Replace_fabsf() override; void DoState(PointerWrap &p); - bool CheckRounding(); // returns true if we need a do-over + bool CheckRounding(u32 blockAddress); // returns true if we need a do-over void DoJit(u32 em_address, std::vector &instructions, std::vector &constants, u32 &mipsBytes); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index f92f4f54ab56..7110ab7ea2e9 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -74,7 +74,7 @@ void IRJit::Compile(u32 em_address) { // Overwrites the first instruction, and also updates stats. blocks_.FinalizeBlock(block_num); - if (frontend_.CheckRounding()) { + if (frontend_.CheckRounding(em_address)) { // Our assumptions are all wrong so it's clean-slate time. ClearCache(); Compile(em_address); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index e220336b2a6a..48e098cc3ccb 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -286,7 +286,7 @@ void Jit::Compile(u32 em_address) { // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); + WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4); js.LogPrefix(); // Let's try that one more time. We won't get back here because we toggled the value. From d8d174fa2b34bb23bbe42eceee514d0decbcc89d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 31 Dec 2017 16:39:11 -0800 Subject: [PATCH 2/6] arm64jit: Avoid spilling an extra reg for lwl/lwr. It's only needed for swl and swr. --- Core/MIPS/ARM64/Arm64CompLoadStore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp index a3f44e9e8927..9a226a32e6ae 100644 --- a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp +++ b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp @@ -180,7 +180,7 @@ namespace MIPSComp { gpr.SpillLock(rs); // Need to get temps before skipping safe mem. ARM64Reg LR_SCRATCH3 = gpr.GetAndLockTempR(); - ARM64Reg LR_SCRATCH4 = gpr.GetAndLockTempR(); + ARM64Reg LR_SCRATCH4 = o == 42 || o == 46 ? gpr.GetAndLockTempR() : INVALID_REG; if (!g_Config.bFastMemory && rs != MIPS_REG_SP) { skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2); From 905d2c2da6564a6976eac7c1c37a9ce6f0546ec1 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 31 Dec 2017 16:41:57 -0800 Subject: [PATCH 3/6] irjit: Cleanup some invalid op handling. And log blocks the same way as other backends. --- Core/MIPS/IR/IRCompALU.cpp | 22 ++++++++++++---------- Core/MIPS/IR/IRCompFPU.cpp | 21 ++++++--------------- Core/MIPS/IR/IRCompLoadStore.cpp | 5 ++++- Core/MIPS/IR/IRCompVFPU.cpp | 2 +- Core/MIPS/IR/IRFrontend.cpp | 16 ++++++++-------- 5 files changed, 31 insertions(+), 35 deletions(-) diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index c7074bed794a..31dba9bfb955 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -40,14 +40,15 @@ // #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; #define DISABLE { Comp_Generic(op); return; } +#define INVALIDOP { Comp_Generic(op); return; } namespace MIPSComp { void IRFrontend::Comp_IType(MIPSOpcode op) { CONDITIONAL_DISABLE; - s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension - u32 uimm = op & 0xFFFF; + s32 simm = (s32)_IMM16; // sign extension + u32 uimm = (u16)_IMM16; u32 suimm = (u32)(s32)simm; MIPSGPReg rt = _RT; @@ -80,7 +81,7 @@ void IRFrontend::Comp_IType(MIPSOpcode op) { break; default: - Comp_Generic(op); + INVALIDOP; break; } } @@ -104,7 +105,7 @@ void IRFrontend::Comp_RType2(MIPSOpcode op) { ir.Write(IROp::Clz, rd, IRTEMP_0); break; default: - Comp_Generic(op); + INVALIDOP; break; } } @@ -176,7 +177,7 @@ void IRFrontend::Comp_RType3(MIPSOpcode op) { break; default: - Comp_Generic(op); + INVALIDOP; break; } } @@ -213,8 +214,9 @@ void IRFrontend::Comp_ShiftType(MIPSOpcode op) { case 4: CompShiftVar(op, IROp::Shl, IROp::ShlImm); break; //sllv case 6: CompShiftVar(op, (sa == 1 ? IROp::Ror : IROp::Shr), (sa == 1 ? IROp::RorImm : IROp::ShrImm)); break; //srlv case 7: CompShiftVar(op, IROp::Sar, IROp::SarImm); break; //srav + default: - Comp_Generic(op); + INVALIDOP; break; } } @@ -256,7 +258,7 @@ void IRFrontend::Comp_Special3(MIPSOpcode op) { break; default: - Comp_Generic(op); + INVALIDOP; break; } } @@ -285,7 +287,7 @@ void IRFrontend::Comp_Allegrex(MIPSOpcode op) { break; default: - Comp_Generic(op); + INVALIDOP; return; } } @@ -307,7 +309,7 @@ void IRFrontend::Comp_Allegrex2(MIPSOpcode op) { ir.Write(IROp::BSwap32, rd, rt); break; default: - Comp_Generic(op); + INVALIDOP; break; } } @@ -372,7 +374,7 @@ void IRFrontend::Comp_MulDivType(MIPSOpcode op) { break; default: - DISABLE; + INVALIDOP; } } diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 96c39acc8d63..55fb03380452 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -51,6 +51,7 @@ // #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; #define DISABLE { Comp_Generic(op); return; } +#define INVALIDOP { Comp_Generic(op); return; } namespace MIPSComp { @@ -67,7 +68,7 @@ void IRFrontend::Comp_FPU3op(MIPSOpcode op) { case 2: ir.Write(IROp::FMul, fd, fs, ft); break; //F(fd) = F(fs) * F(ft); //mul case 3: ir.Write(IROp::FDiv, fd, fs, ft); break; //F(fd) = F(fs) / F(ft); //div default: - DISABLE; + INVALIDOP; return; } } @@ -90,7 +91,7 @@ void IRFrontend::Comp_FPULS(MIPSOpcode op) { break; default: - _dbg_assert_msg_(CPU, 0, "Trying to interpret FPULS instruction that can't be interpreted"); + INVALIDOP; break; } } @@ -131,7 +132,7 @@ void IRFrontend::Comp_FPUComp(MIPSOpcode op) { mode = IRFpCompareMode::LessEqualUnordered; break; default: - DISABLE; + INVALIDOP; return; } ir.Write(IROp::FCmp, (int)mode, fs, ft); @@ -158,27 +159,17 @@ void IRFrontend::Comp_FPU2op(MIPSOpcode op) { break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s - { ir.Write(IROp::FRound, fd, fs); break; - } - case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s - { ir.Write(IROp::FTrunc, fd, fs); break; - } - case 14://FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s - { ir.Write(IROp::FCeil, fd, fs); break; - } case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s - { ir.Write(IROp::FFloor, fd, fs); break; - } case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w ir.Write(IROp::FCvtSW, fd, fs); @@ -189,7 +180,7 @@ void IRFrontend::Comp_FPU2op(MIPSOpcode op) { break; default: - DISABLE; + INVALIDOP; } } @@ -234,7 +225,7 @@ void IRFrontend::Comp_mxc1(MIPSOpcode op) { } return; default: - DISABLE; + INVALIDOP; break; } } diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index 50130fc5834d..d956e13d07c8 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -96,13 +96,16 @@ namespace MIPSComp { case 46: //swr DISABLE; break; + default: - Comp_Generic(op); + INVALIDOP; return; } } void IRFrontend::Comp_Cache(MIPSOpcode op) { + CONDITIONAL_DISABLE; + // int imm = (s16)(op & 0xFFFF); // int rs = _RS; // int addr = R(rs) + imm; diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 5cc586a1982c..8638abae0b37 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1929,7 +1929,7 @@ namespace MIPSComp { ir.Write(IROp::FSub, tempregs[3], sregs[2], sregs[3]); } } else { - DISABLE; + INVALIDOP; } for (int i = 0; i < n; ++i) { diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 7b8ab0f50dd8..bdafffae9664 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -279,32 +279,32 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v if (logBlocks > 0 && dontLogBlocks == 0) { char temp2[256]; - ILOG("=============== mips %08x ===============", em_address); + NOTICE_LOG(JIT, "=============== mips %08x ===============", em_address); for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { temp2[0] = 0; MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); - ILOG("M: %08x %s", cpc, temp2); + NOTICE_LOG(JIT, "M: %08x %s", cpc, temp2); } } if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); + NOTICE_LOG(JIT, "=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size()); for (size_t i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], &ir.GetConstants()[0]); - ILOG("%s", buf); + NOTICE_LOG(JIT, "%s", buf); } - ILOG("=============== end ================="); + NOTICE_LOG(JIT, "=============== end ================="); } if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); + NOTICE_LOG(JIT, "=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size()); for (size_t i = 0; i < code->GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], &code->GetConstants()[0]); - ILOG("%s", buf); + NOTICE_LOG(JIT, "%s", buf); } - ILOG("=============== end ================="); + NOTICE_LOG(JIT, "=============== end ================="); } if (logBlocks > 0) From 671be241051d318a0ea18a892aaf9d9609106568 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 31 Dec 2017 16:44:47 -0800 Subject: [PATCH 4/6] irjit: Add extra temps to make lwl/swl/etc. easier. --- Core/MIPS/IR/IRInst.cpp | 2 ++ Core/MIPS/IR/IRInst.h | 2 ++ Core/MIPS/IR/IRPassSimplify.cpp | 2 ++ 3 files changed, 6 insertions(+) diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 2c216bf9c536..655a736ceee0 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -207,6 +207,8 @@ const char *GetGPRName(int r) { switch (r) { case IRTEMP_0: return "irtemp0"; case IRTEMP_1: return "irtemp1"; + case IRTEMP_2: return "irtemp2"; + case IRTEMP_3: return "irtemp3"; case IRTEMP_LHS: return "irtemp_lhs"; case IRTEMP_RHS: return "irtemp_rhs"; default: return "(unk)"; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 50d9a6707dfc..150654217798 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -275,6 +275,8 @@ enum IRFpCompareMode { enum { IRTEMP_0 = 192, IRTEMP_1, + IRTEMP_2, + IRTEMP_3, IRTEMP_LHS, // Reserved for use in branches IRTEMP_RHS, // Reserved for use in branches diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index aba843689b06..c8fdbafbbce2 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -662,6 +662,8 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) { switch (dest) { case IRTEMP_0: case IRTEMP_1: + case IRTEMP_2: + case IRTEMP_3: case IRTEMP_LHS: case IRTEMP_RHS: // Unlike other ops, these don't need to persist between blocks. From b37ba9e5994657bf272ba6c50c758af1897e611f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 31 Dec 2017 17:14:34 -0800 Subject: [PATCH 5/6] irjit: Add options for compile/optimize steps. This way the backend can set flags for the type of IR it wants. It's seems too complex to combine certain things like lwl/lwr in a pass. --- Core/MIPS/IR/IRFrontend.cpp | 4 +--- Core/MIPS/IR/IRFrontend.h | 9 +++++++-- Core/MIPS/IR/IRInst.h | 4 ++++ Core/MIPS/IR/IRJit.cpp | 4 ++++ Core/MIPS/IR/IRPassSimplify.cpp | 32 +++++++++++++++----------------- Core/MIPS/IR/IRPassSimplify.h | 18 +++++++++--------- 6 files changed, 40 insertions(+), 31 deletions(-) diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index bdafffae9664..d3da613ca483 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -32,8 +32,6 @@ namespace MIPSComp { IRFrontend::IRFrontend(bool startDefaultPrefix) { - logBlocks = 0; - dontLogBlocks = 0; js.startDefaultPrefix = true; js.hasSetRounding = false; // js.currentRoundingFunc = convertS0ToSCRATCH1[0]; @@ -267,7 +265,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v // &MergeLoadStore, // &ThreeOpToTwoOp, }; - if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) + if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified, opts)) logBlocks = 1; code = &simplified; //if (ir.GetInstructions().size() >= 24) diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h index e59cf3c70fb0..600177be2096 100644 --- a/Core/MIPS/IR/IRFrontend.h +++ b/Core/MIPS/IR/IRFrontend.h @@ -94,6 +94,10 @@ class IRFrontend : public MIPSFrontendInterface { js.EatPrefix(); } + void SetOptions(const IROptions &o) { + opts = o; + } + private: void RestoreRoundingMode(bool force = false); void ApplyRoundingMode(bool force = false); @@ -134,9 +138,10 @@ class IRFrontend : public MIPSFrontendInterface { // State JitState js; IRWriter ir; + IROptions opts{}; - int dontLogBlocks; - int logBlocks; + int dontLogBlocks = 0; + int logBlocks = 0; }; } // namespace diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 150654217798..a54e4cab9394 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -363,6 +363,10 @@ class IRWriter { std::vector constPool_; }; +struct IROptions { + bool unalignedLoadStore; +}; + const IRMeta *GetIRMeta(IROp op); void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool); void InitIR(); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 7110ab7ea2e9..84db03b150dd 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -41,6 +41,10 @@ IRJit::IRJit(MIPSState *mips) : frontend_(mips->HasDefaultPrefix()), mips_(mips) u32 size = 128 * 1024; // blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); InitIR(); + + IROptions opts{}; + opts.unalignedLoadStore = true; + frontend_.SetOptions(opts); } IRJit::~IRJit() { diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index c8fdbafbbce2..c8e728f45c37 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -110,9 +110,9 @@ IROp ShiftToShiftImm(IROp op) { } } -bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out) { +bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out, const IROptions &opts) { if (c == 1) { - return passes[0](in, out); + return passes[0](in, out, opts); } bool logBlocks = false; @@ -121,7 +121,7 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri const IRWriter *nextIn = ∈ IRWriter *nextOut = &temp[1]; for (size_t i = 0; i < c - 1; ++i) { - if (passes[i](*nextIn, *nextOut)) { + if (passes[i](*nextIn, *nextOut, opts)) { logBlocks = true; } @@ -129,14 +129,14 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri nextIn = &temp[0]; } - if (passes[c - 1](*nextIn, out)) { + if (passes[c - 1](*nextIn, out, opts)) { logBlocks = true; } return logBlocks; } -bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { +bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts) { const u32 *constants = !in.GetConstants().empty() ? &in.GetConstants()[0] : nullptr; bool logBlocks = false; IRInst prev; @@ -191,7 +191,7 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) { } // Might be useful later on x86. -bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) { +bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts) { bool logBlocks = false; for (int i = 0; i < (int)in.GetInstructions().size(); i++) { IRInst inst = in.GetInstructions()[i]; @@ -245,7 +245,7 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) { return logBlocks; } -bool PropagateConstants(const IRWriter &in, IRWriter &out) { +bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts) { IRRegCache gpr(&out); const u32 *constants = !in.GetConstants().empty() ? &in.GetConstants()[0] : nullptr; @@ -619,7 +619,7 @@ int IRDestGPR(const IRInst &inst) { return -1; } -bool PurgeTemps(const IRWriter &in, IRWriter &out) { +bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) { std::vector insts; insts.reserve(in.GetInstructions().size()); @@ -710,7 +710,7 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) { return logBlocks; } -bool ReduceLoads(const IRWriter &in, IRWriter &out) { +bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts) { for (u32 value : in.GetConstants()) { out.AddConstant(value); } @@ -846,7 +846,7 @@ static std::vector ReorderLoadStoreOps(std::vector &ops, const u return ops; } -bool ReorderLoadStore(const IRWriter &in, IRWriter &out) { +bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) { bool logBlocks = false; enum class RegState : u8 { @@ -1042,7 +1042,7 @@ bool ReorderLoadStore(const IRWriter &in, IRWriter &out) { return logBlocks; } -bool MergeLoadStore(const IRWriter &in, IRWriter &out) { +bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) { bool logBlocks = false; auto opsCompatible = [&](const IRInst &a, const IRInst &b, int dist) { @@ -1076,16 +1076,15 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out) { break; } } - // Warning: this may generate unaligned stores. - if (c == 2 || c == 3) { + if ((c == 2 || c == 3) && opts.unalignedLoadStore) { inst.op = IROp::Store16; out.Write(inst); prev = inst; - // Skip the next one. + // Skip the next one (the 3rd will be separate.) ++i; continue; } - if (c == 4) { + if (c == 4 && opts.unalignedLoadStore) { inst.op = IROp::Store32; out.Write(inst); prev = inst; @@ -1108,8 +1107,7 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out) { break; } } - // Warning: this may generate unaligned stores. - if (c == 2) { + if (c == 2 && opts.unalignedLoadStore) { inst.op = IROp::Store32; out.Write(inst); prev = inst; diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h index aeb2cff238d7..118b44128f51 100644 --- a/Core/MIPS/IR/IRPassSimplify.h +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -2,14 +2,14 @@ #include "Core/MIPS/IR/IRInst.h" -typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out); -bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out); +typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out, const IROptions &opts); // Block optimizer passes of varying usefulness. -bool PropagateConstants(const IRWriter &in, IRWriter &out); -bool PurgeTemps(const IRWriter &in, IRWriter &out); -bool ReduceLoads(const IRWriter &in, IRWriter &out); -bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out); -bool OptimizeFPMoves(const IRWriter &in, IRWriter &out); -bool ReorderLoadStore(const IRWriter &in, IRWriter &out); -bool MergeLoadStore(const IRWriter &in, IRWriter &out); +bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts); +bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts); From 3abcc4d6d8442ca3a2b2e7a1751586c4a67b1eee Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 31 Dec 2017 17:21:41 -0800 Subject: [PATCH 6/6] irjit: Implement lwl/lwr/swl/swr. This is very similar to the arm64jit implementation. --- Core/MIPS/IR/IRCompLoadStore.cpp | 117 +++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index d956e13d07c8..31beef109d82 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -41,16 +41,124 @@ // #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; #define DISABLE { Comp_Generic(op); return; } +#define INVALIDOP { Comp_Generic(op); return; } namespace MIPSComp { void IRFrontend::Comp_ITypeMemLR(MIPSOpcode op, bool load) { - DISABLE; + CONDITIONAL_DISABLE; + + int offset = _IMM16; + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + int o = op >> 26; + + if (!js.inDelaySlot && opts.unalignedLoadStore) { + // Optimisation: Combine to single unaligned load/store. + const bool isLeft = (o == 34 || o == 42); + MIPSOpcode nextOp = GetOffsetInstruction(1); + // Find a matching shifted load/store in opposite direction with opposite offset. + if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) { + EatInstruction(nextOp); + + if (isLeft) { + // Get the unaligned base offset from the lwr/swr instruction. + offset = (signed short)(nextOp & 0xFFFF); + // Already checked it if we're on the lwr. + CheckMemoryBreakpoint(rs, offset); + } + + if (load) { + ir.Write(IROp::Load32, rt, rs, ir.AddConstant(offset)); + } else { + ir.Write(IROp::Store32, rt, rs, ir.AddConstant(offset)); + } + return; + } + } + + int addrReg = IRTEMP_0; + int valueReg = IRTEMP_1; + int maskReg = IRTEMP_2; + int shiftReg = IRTEMP_3; + + // addrReg = rs + imm + ir.Write(IROp::AddConst, addrReg, rs, ir.AddConstant(offset)); + // shiftReg = (addr & 3) * 8 + ir.Write(IROp::AndConst, shiftReg, addrReg, ir.AddConstant(3)); + ir.Write(IROp::ShlImm, shiftReg, shiftReg, 3); + // addrReg = addr & 0xfffffffc (for stores, later) + ir.Write(IROp::AndConst, addrReg, addrReg, ir.AddConstant(0xFFFFFFFC)); + // valueReg = RAM(addrReg) + ir.Write(IROp::Load32, valueReg, addrReg, ir.AddConstant(0)); + + switch (o) { + case 34: //lwl + // rt &= (0x00ffffff >> shift) + // Alternatively, could shift to a wall and back (but would require two shifts each way.) + ir.WriteSetConstant(maskReg, 0x00ffffff); + ir.Write(IROp::Shr, maskReg, maskReg, shiftReg); + ir.Write(IROp::And, rt, rt, maskReg); + // valueReg <<= (24 - shift) + ir.Write(IROp::Neg, shiftReg, shiftReg); + ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24)); + ir.Write(IROp::Shl, valueReg, valueReg, shiftReg); + // rt |= valueReg + ir.Write(IROp::Or, rt, rt, valueReg); + break; + case 38: //lwr + // valueReg >>= shift + ir.Write(IROp::Shr, valueReg, valueReg, shiftReg); + // shiftReg = 24 - shift + ir.Write(IROp::Neg, shiftReg, shiftReg); + ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24)); + // rt &= (0xffffff00 << (24 - shift)) + // Alternatively, could shift to a wall and back (but would require two shifts each way.) + ir.WriteSetConstant(maskReg, 0xffffff00); + ir.Write(IROp::Shl, maskReg, maskReg, shiftReg); + ir.Write(IROp::And, rt, rt, maskReg); + // rt |= valueReg + ir.Write(IROp::Or, rt, rt, valueReg); + break; + case 42: //swl + // valueReg &= 0xffffff00 << shift + ir.WriteSetConstant(maskReg, 0xffffff00); + ir.Write(IROp::Shl, maskReg, maskReg, shiftReg); + ir.Write(IROp::And, valueReg, valueReg, maskReg); + // shiftReg = 24 - shift + ir.Write(IROp::Neg, shiftReg, shiftReg); + ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24)); + // valueReg |= rt >> (24 - shift) + ir.Write(IROp::Shr, maskReg, rt, shiftReg); + ir.Write(IROp::Or, valueReg, valueReg, maskReg); + break; + case 46: //swr + // valueReg &= 0x00ffffff << (24 - shift) + ir.WriteSetConstant(maskReg, 0x00ffffff); + ir.Write(IROp::Neg, shiftReg, shiftReg); + ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24)); + ir.Write(IROp::Shl, maskReg, maskReg, shiftReg); + ir.Write(IROp::And, valueReg, valueReg, maskReg); + ir.Write(IROp::Neg, shiftReg, shiftReg); + ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24)); + // valueReg |= rt << shift + ir.Write(IROp::Shl, maskReg, rt, shiftReg); + ir.Write(IROp::Or, valueReg, valueReg, maskReg); + break; + default: + INVALIDOP; + return; + } + + if (!load) { + // RAM(addrReg) = valueReg + ir.Write(IROp::Store32, valueReg, addrReg, ir.AddConstant(0)); + } } void IRFrontend::Comp_ITypeMem(MIPSOpcode op) { CONDITIONAL_DISABLE; - int offset = (signed short)(op & 0xFFFF); + int offset = _IMM16; MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; int o = op >> 26; @@ -61,7 +169,6 @@ namespace MIPSComp { CheckMemoryBreakpoint(rs, offset); - int addrReg = IRTEMP_0; switch (o) { // Load case 35: @@ -92,9 +199,11 @@ namespace MIPSComp { case 34: //lwl case 38: //lwr + Comp_ITypeMemLR(op, true); + break; case 42: //swl case 46: //swr - DISABLE; + Comp_ITypeMemLR(op, false); break; default: