From 8ffb0101fe8c401267ce43edef4eaa75b412fe53 Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 31 Dec 2017 16:37:49 -0800
Subject: [PATCH 1/6] jit: Report blocks with uneaten VFPU prefixes.

There may be options to avoid, like continuing these blocks, especially if
they're likely or something.
---
 Core/MIPS/ARM/ArmJit.cpp     | 2 +-
 Core/MIPS/ARM64/Arm64Jit.cpp | 2 +-
 Core/MIPS/IR/IRFrontend.cpp  | 4 ++--
 Core/MIPS/IR/IRFrontend.h    | 2 +-
 Core/MIPS/IR/IRJit.cpp       | 2 +-
 Core/MIPS/x86/Jit.cpp        | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp
index 0a7c3ad538bf..050cef8025ca 100644
--- a/Core/MIPS/ARM/ArmJit.cpp
+++ b/Core/MIPS/ARM/ArmJit.cpp
@@ -218,7 +218,7 @@ void ArmJit::Compile(u32 em_address) {
 
 	// Drat.  The VFPU hit an uneaten prefix at the end of a block.
 	if (js.startDefaultPrefix && js.MayHavePrefix()) {
-		WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
+		WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
 		js.LogPrefix();
 
 		// Let's try that one more time.  We won't get back here because we toggled the value.
diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp
index 86b80a3a57b7..e1aaea334f84 100644
--- a/Core/MIPS/ARM64/Arm64Jit.cpp
+++ b/Core/MIPS/ARM64/Arm64Jit.cpp
@@ -204,7 +204,7 @@ void Arm64Jit::Compile(u32 em_address) {
 
 	// Drat.  The VFPU hit an uneaten prefix at the end of a block.
 	if (js.startDefaultPrefix && js.MayHavePrefix()) {
-		WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
+		WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
 		js.LogPrefix();
 
 		// Let's try that one more time.  We won't get back here because we toggled the value.
diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp
index 17985ab8355b..7b8ab0f50dd8 100644
--- a/Core/MIPS/IR/IRFrontend.cpp
+++ b/Core/MIPS/IR/IRFrontend.cpp
@@ -105,7 +105,7 @@ void IRFrontend::CompileDelaySlot() {
 	js.inDelaySlot = false;
 }
 
-bool IRFrontend::CheckRounding() {
+bool IRFrontend::CheckRounding(u32 blockAddress) {
 	bool cleanSlate = false;
 	if (js.hasSetRounding && !js.lastSetRounding) {
 		WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
@@ -116,7 +116,7 @@ bool IRFrontend::CheckRounding() {
 
 	// Drat.  The VFPU hit an uneaten prefix at the end of a block.
 	if (js.startDefaultPrefix && js.MayHavePrefix()) {
-		WARN_LOG(JIT, "An uneaten prefix at end of block");
+		WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block for %08x", blockAddress);
 		logBlocks = 1;
 		js.LogPrefix();
 
diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h
index b3fb8cdecf5a..e59cf3c70fb0 100644
--- a/Core/MIPS/IR/IRFrontend.h
+++ b/Core/MIPS/IR/IRFrontend.h
@@ -86,7 +86,7 @@ class IRFrontend : public MIPSFrontendInterface {
 
 	int Replace_fabsf() override;
 	void DoState(PointerWrap &p);
-	bool CheckRounding();  // returns true if we need a do-over
+	bool CheckRounding(u32 blockAddress);  // returns true if we need a do-over
 
 	void DoJit(u32 em_address, std::vector<IRInst> &instructions, std::vector<u32> &constants, u32 &mipsBytes);
 
diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp
index f92f4f54ab56..7110ab7ea2e9 100644
--- a/Core/MIPS/IR/IRJit.cpp
+++ b/Core/MIPS/IR/IRJit.cpp
@@ -74,7 +74,7 @@ void IRJit::Compile(u32 em_address) {
 	// Overwrites the first instruction, and also updates stats.
 	blocks_.FinalizeBlock(block_num);
 
-	if (frontend_.CheckRounding()) {
+	if (frontend_.CheckRounding(em_address)) {
 		// Our assumptions are all wrong so it's clean-slate time.
 		ClearCache();
 		Compile(em_address);
diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp
index e220336b2a6a..48e098cc3ccb 100644
--- a/Core/MIPS/x86/Jit.cpp
+++ b/Core/MIPS/x86/Jit.cpp
@@ -286,7 +286,7 @@ void Jit::Compile(u32 em_address) {
 
 	// Drat.  The VFPU hit an uneaten prefix at the end of a block.
 	if (js.startDefaultPrefix && js.MayHavePrefix()) {
-		WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
+		WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
 		js.LogPrefix();
 
 		// Let's try that one more time.  We won't get back here because we toggled the value.

From d8d174fa2b34bb23bbe42eceee514d0decbcc89d Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 31 Dec 2017 16:39:11 -0800
Subject: [PATCH 2/6] arm64jit: Avoid spilling an extra reg for lwl/lwr.

It's only needed for swl and swr.
---
 Core/MIPS/ARM64/Arm64CompLoadStore.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
index a3f44e9e8927..9a226a32e6ae 100644
--- a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
+++ b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
@@ -180,7 +180,7 @@ namespace MIPSComp {
 		gpr.SpillLock(rs);
 		// Need to get temps before skipping safe mem.
 		ARM64Reg LR_SCRATCH3 = gpr.GetAndLockTempR();
-		ARM64Reg LR_SCRATCH4 = gpr.GetAndLockTempR();
+		ARM64Reg LR_SCRATCH4 = o == 42 || o == 46 ? gpr.GetAndLockTempR() : INVALID_REG;
 
 		if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
 			skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);

From 905d2c2da6564a6976eac7c1c37a9ce6f0546ec1 Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 31 Dec 2017 16:41:57 -0800
Subject: [PATCH 3/6] irjit: Cleanup some invalid op handling.

And log blocks the same way as other backends.
---
 Core/MIPS/IR/IRCompALU.cpp       | 22 ++++++++++++----------
 Core/MIPS/IR/IRCompFPU.cpp       | 21 ++++++---------------
 Core/MIPS/IR/IRCompLoadStore.cpp |  5 ++++-
 Core/MIPS/IR/IRCompVFPU.cpp      |  2 +-
 Core/MIPS/IR/IRFrontend.cpp      | 16 ++++++++--------
 5 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp
index c7074bed794a..31dba9bfb955 100644
--- a/Core/MIPS/IR/IRCompALU.cpp
+++ b/Core/MIPS/IR/IRCompALU.cpp
@@ -40,14 +40,15 @@
 // #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
 #define CONDITIONAL_DISABLE ;
 #define DISABLE { Comp_Generic(op); return; }
+#define INVALIDOP { Comp_Generic(op); return; }
 
 namespace MIPSComp {
 
 void IRFrontend::Comp_IType(MIPSOpcode op) {
 	CONDITIONAL_DISABLE;
 
-	s32 simm = (s32)(s16)(op & 0xFFFF);  // sign extension
-	u32 uimm = op & 0xFFFF;
+	s32 simm = (s32)_IMM16;  // sign extension
+	u32 uimm = (u16)_IMM16;
 	u32 suimm = (u32)(s32)simm;
 
 	MIPSGPReg rt = _RT;
@@ -80,7 +81,7 @@ void IRFrontend::Comp_IType(MIPSOpcode op) {
 		break;
 
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		break;
 	}
 }
@@ -104,7 +105,7 @@ void IRFrontend::Comp_RType2(MIPSOpcode op) {
 		ir.Write(IROp::Clz, rd, IRTEMP_0);
 		break;
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		break;
 	}
 }
@@ -176,7 +177,7 @@ void IRFrontend::Comp_RType3(MIPSOpcode op) {
 		break;
 
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		break;
 	}
 }
@@ -213,8 +214,9 @@ void IRFrontend::Comp_ShiftType(MIPSOpcode op) {
 	case 4: CompShiftVar(op, IROp::Shl, IROp::ShlImm); break; //sllv
 	case 6: CompShiftVar(op, (sa == 1 ? IROp::Ror : IROp::Shr), (sa == 1 ? IROp::RorImm : IROp::ShrImm)); break; //srlv
 	case 7: CompShiftVar(op, IROp::Sar, IROp::SarImm); break; //srav
+
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		break;
 	}
 }
@@ -256,7 +258,7 @@ void IRFrontend::Comp_Special3(MIPSOpcode op) {
 	break;
 
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		break;
 	}
 }
@@ -285,7 +287,7 @@ void IRFrontend::Comp_Allegrex(MIPSOpcode op) {
 		break;
 
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		return;
 	}
 }
@@ -307,7 +309,7 @@ void IRFrontend::Comp_Allegrex2(MIPSOpcode op) {
 		ir.Write(IROp::BSwap32, rd, rt);
 		break;
 	default:
-		Comp_Generic(op);
+		INVALIDOP;
 		break;
 	}
 }
@@ -372,7 +374,7 @@ void IRFrontend::Comp_MulDivType(MIPSOpcode op) {
 		break;
 
 	default:
-		DISABLE;
+		INVALIDOP;
 	}
 }
 
diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp
index 96c39acc8d63..55fb03380452 100644
--- a/Core/MIPS/IR/IRCompFPU.cpp
+++ b/Core/MIPS/IR/IRCompFPU.cpp
@@ -51,6 +51,7 @@
 // #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
 #define CONDITIONAL_DISABLE ;
 #define DISABLE { Comp_Generic(op); return; }
+#define INVALIDOP { Comp_Generic(op); return; }
 
 namespace MIPSComp {
 
@@ -67,7 +68,7 @@ void IRFrontend::Comp_FPU3op(MIPSOpcode op) {
 	case 2: ir.Write(IROp::FMul, fd, fs, ft); break; //F(fd) = F(fs) * F(ft); //mul
 	case 3: ir.Write(IROp::FDiv, fd, fs, ft); break; //F(fd) = F(fs) / F(ft); //div
 	default:
-		DISABLE;
+		INVALIDOP;
 		return;
 	}
 }
@@ -90,7 +91,7 @@ void IRFrontend::Comp_FPULS(MIPSOpcode op) {
 		break;
 
 	default:
-		_dbg_assert_msg_(CPU, 0, "Trying to interpret FPULS instruction that can't be interpreted");
+		INVALIDOP;
 		break;
 	}
 }
@@ -131,7 +132,7 @@ void IRFrontend::Comp_FPUComp(MIPSOpcode op) {
 		mode = IRFpCompareMode::LessEqualUnordered;
 		break;
 	default:
-		DISABLE;
+		INVALIDOP;
 		return;
 	}
 	ir.Write(IROp::FCmp, (int)mode, fs, ft);
@@ -158,27 +159,17 @@ void IRFrontend::Comp_FPU2op(MIPSOpcode op) {
 		break;
 
 	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
-	{
 		ir.Write(IROp::FRound, fd, fs);
 		break;
-	}
-
 	case 13: //FsI(fd) = Rto0(F(fs)));            break; //trunc.w.s
-	{
 		ir.Write(IROp::FTrunc, fd, fs);
 		break;
-	}
-
 	case 14://FsI(fd) = (int)ceilf (F(fs));      break; //ceil.w.s
-	{
 		ir.Write(IROp::FCeil, fd, fs);
 		break;
-	}
 	case 15: //FsI(fd) = (int)floorf(F(fs));      break; //floor.w.s
-	{
 		ir.Write(IROp::FFloor, fd, fs);
 		break;
-	}
 
 	case 32: //F(fd)   = (float)FsI(fs);          break; //cvt.s.w
 		ir.Write(IROp::FCvtSW, fd, fs);
@@ -189,7 +180,7 @@ void IRFrontend::Comp_FPU2op(MIPSOpcode op) {
 		break;
 
 	default:
-		DISABLE;
+		INVALIDOP;
 	}
 }
 
@@ -234,7 +225,7 @@ void IRFrontend::Comp_mxc1(MIPSOpcode op) {
 		}
 		return;
 	default:
-		DISABLE;
+		INVALIDOP;
 		break;
 	}
 }
diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp
index 50130fc5834d..d956e13d07c8 100644
--- a/Core/MIPS/IR/IRCompLoadStore.cpp
+++ b/Core/MIPS/IR/IRCompLoadStore.cpp
@@ -96,13 +96,16 @@ namespace MIPSComp {
 		case 46: //swr
 			DISABLE;
 			break;
+
 		default:
-			Comp_Generic(op);
+			INVALIDOP;
 			return;
 		}
 	}
 
 	void IRFrontend::Comp_Cache(MIPSOpcode op) {
+		CONDITIONAL_DISABLE;
+
 //		int imm = (s16)(op & 0xFFFF);
 //		int rs = _RS;
 //		int addr = R(rs) + imm;
diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp
index 5cc586a1982c..8638abae0b37 100644
--- a/Core/MIPS/IR/IRCompVFPU.cpp
+++ b/Core/MIPS/IR/IRCompVFPU.cpp
@@ -1929,7 +1929,7 @@ namespace MIPSComp {
 				ir.Write(IROp::FSub, tempregs[3], sregs[2], sregs[3]);
 			}
 		} else {
-			DISABLE;
+			INVALIDOP;
 		}
 
 		for (int i = 0; i < n; ++i) {
diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp
index 7b8ab0f50dd8..bdafffae9664 100644
--- a/Core/MIPS/IR/IRFrontend.cpp
+++ b/Core/MIPS/IR/IRFrontend.cpp
@@ -279,32 +279,32 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
 
 	if (logBlocks > 0 && dontLogBlocks == 0) {
 		char temp2[256];
-		ILOG("=============== mips %08x ===============", em_address);
+		NOTICE_LOG(JIT, "=============== mips %08x ===============", em_address);
 		for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {
 			temp2[0] = 0;
 			MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true);
-			ILOG("M: %08x   %s", cpc, temp2);
+			NOTICE_LOG(JIT, "M: %08x   %s", cpc, temp2);
 		}
 	}
 
 	if (logBlocks > 0 && dontLogBlocks == 0) {
-		ILOG("=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size());
+		NOTICE_LOG(JIT, "=============== Original IR (%d instructions, %d const) ===============", (int)ir.GetInstructions().size(), (int)ir.GetConstants().size());
 		for (size_t i = 0; i < ir.GetInstructions().size(); i++) {
 			char buf[256];
 			DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], &ir.GetConstants()[0]);
-			ILOG("%s", buf);
+			NOTICE_LOG(JIT, "%s", buf);
 		}
-		ILOG("===============        end         =================");
+		NOTICE_LOG(JIT, "===============        end         =================");
 	}
 
 	if (logBlocks > 0 && dontLogBlocks == 0) {
-		ILOG("=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size());
+		NOTICE_LOG(JIT, "=============== IR (%d instructions, %d const) ===============", (int)code->GetInstructions().size(), (int)code->GetConstants().size());
 		for (size_t i = 0; i < code->GetInstructions().size(); i++) {
 			char buf[256];
 			DisassembleIR(buf, sizeof(buf), code->GetInstructions()[i], &code->GetConstants()[0]);
-			ILOG("%s", buf);
+			NOTICE_LOG(JIT, "%s", buf);
 		}
-		ILOG("===============        end         =================");
+		NOTICE_LOG(JIT, "===============        end         =================");
 	}
 
 	if (logBlocks > 0)

From 671be241051d318a0ea18a892aaf9d9609106568 Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 31 Dec 2017 16:44:47 -0800
Subject: [PATCH 4/6] irjit: Add extra temps to make lwl/swl/etc. easier.

---
 Core/MIPS/IR/IRInst.cpp         | 2 ++
 Core/MIPS/IR/IRInst.h           | 2 ++
 Core/MIPS/IR/IRPassSimplify.cpp | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp
index 2c216bf9c536..655a736ceee0 100644
--- a/Core/MIPS/IR/IRInst.cpp
+++ b/Core/MIPS/IR/IRInst.cpp
@@ -207,6 +207,8 @@ const char *GetGPRName(int r) {
 	switch (r) {
 	case IRTEMP_0: return "irtemp0";
 	case IRTEMP_1: return "irtemp1";
+	case IRTEMP_2: return "irtemp2";
+	case IRTEMP_3: return "irtemp3";
 	case IRTEMP_LHS: return "irtemp_lhs";
 	case IRTEMP_RHS: return "irtemp_rhs";
 	default: return "(unk)";
diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h
index 50d9a6707dfc..150654217798 100644
--- a/Core/MIPS/IR/IRInst.h
+++ b/Core/MIPS/IR/IRInst.h
@@ -275,6 +275,8 @@ enum IRFpCompareMode {
 enum {
 	IRTEMP_0 = 192,
 	IRTEMP_1,
+	IRTEMP_2,
+	IRTEMP_3,
 	IRTEMP_LHS,  // Reserved for use in branches
 	IRTEMP_RHS,  // Reserved for use in branches
 
diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp
index aba843689b06..c8fdbafbbce2 100644
--- a/Core/MIPS/IR/IRPassSimplify.cpp
+++ b/Core/MIPS/IR/IRPassSimplify.cpp
@@ -662,6 +662,8 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) {
 		switch (dest) {
 		case IRTEMP_0:
 		case IRTEMP_1:
+		case IRTEMP_2:
+		case IRTEMP_3:
 		case IRTEMP_LHS:
 		case IRTEMP_RHS:
 			// Unlike other ops, these don't need to persist between blocks.

From b37ba9e5994657bf272ba6c50c758af1897e611f Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 31 Dec 2017 17:14:34 -0800
Subject: [PATCH 5/6] irjit: Add options for compile/optimize steps.

This way the backend can set flags for the type of IR it wants.  It's
seems too complex to combine certain things like lwl/lwr in a pass.
---
 Core/MIPS/IR/IRFrontend.cpp     |  4 +---
 Core/MIPS/IR/IRFrontend.h       |  9 +++++++--
 Core/MIPS/IR/IRInst.h           |  4 ++++
 Core/MIPS/IR/IRJit.cpp          |  4 ++++
 Core/MIPS/IR/IRPassSimplify.cpp | 32 +++++++++++++++-----------------
 Core/MIPS/IR/IRPassSimplify.h   | 18 +++++++++---------
 6 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp
index bdafffae9664..d3da613ca483 100644
--- a/Core/MIPS/IR/IRFrontend.cpp
+++ b/Core/MIPS/IR/IRFrontend.cpp
@@ -32,8 +32,6 @@
 namespace MIPSComp {
 
 IRFrontend::IRFrontend(bool startDefaultPrefix) {
-	logBlocks = 0;
-	dontLogBlocks = 0;
 	js.startDefaultPrefix = true;
 	js.hasSetRounding = false;
 	// js.currentRoundingFunc = convertS0ToSCRATCH1[0];
@@ -267,7 +265,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
 			// &MergeLoadStore,
 			// &ThreeOpToTwoOp,
 		};
-		if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified))
+		if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified, opts))
 			logBlocks = 1;
 		code = &simplified;
 		//if (ir.GetInstructions().size() >= 24)
diff --git a/Core/MIPS/IR/IRFrontend.h b/Core/MIPS/IR/IRFrontend.h
index e59cf3c70fb0..600177be2096 100644
--- a/Core/MIPS/IR/IRFrontend.h
+++ b/Core/MIPS/IR/IRFrontend.h
@@ -94,6 +94,10 @@ class IRFrontend : public MIPSFrontendInterface {
 		js.EatPrefix();
 	}
 
+	void SetOptions(const IROptions &o) {
+		opts = o;
+	}
+
 private:
 	void RestoreRoundingMode(bool force = false);
 	void ApplyRoundingMode(bool force = false);
@@ -134,9 +138,10 @@ class IRFrontend : public MIPSFrontendInterface {
 	// State
 	JitState js;
 	IRWriter ir;
+	IROptions opts{};
 
-	int dontLogBlocks;
-	int logBlocks;
+	int dontLogBlocks = 0;
+	int logBlocks = 0;
 };
 
 }  // namespace
diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h
index 150654217798..a54e4cab9394 100644
--- a/Core/MIPS/IR/IRInst.h
+++ b/Core/MIPS/IR/IRInst.h
@@ -363,6 +363,10 @@ class IRWriter {
 	std::vector<u32> constPool_;
 };
 
+struct IROptions {
+	bool unalignedLoadStore;
+};
+
 const IRMeta *GetIRMeta(IROp op);
 void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool);
 void InitIR();
diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp
index 7110ab7ea2e9..84db03b150dd 100644
--- a/Core/MIPS/IR/IRJit.cpp
+++ b/Core/MIPS/IR/IRJit.cpp
@@ -41,6 +41,10 @@ IRJit::IRJit(MIPSState *mips) : frontend_(mips->HasDefaultPrefix()), mips_(mips)
 	u32 size = 128 * 1024;
 	// blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline");
 	InitIR();
+
+	IROptions opts{};
+	opts.unalignedLoadStore = true;
+	frontend_.SetOptions(opts);
 }
 
 IRJit::~IRJit() {
diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp
index c8fdbafbbce2..c8e728f45c37 100644
--- a/Core/MIPS/IR/IRPassSimplify.cpp
+++ b/Core/MIPS/IR/IRPassSimplify.cpp
@@ -110,9 +110,9 @@ IROp ShiftToShiftImm(IROp op) {
 	}
 }
 
-bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out) {
+bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	if (c == 1) {
-		return passes[0](in, out);
+		return passes[0](in, out, opts);
 	}
 
 	bool logBlocks = false;
@@ -121,7 +121,7 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri
 	const IRWriter *nextIn = &in;
 	IRWriter *nextOut = &temp[1];
 	for (size_t i = 0; i < c - 1; ++i) {
-		if (passes[i](*nextIn, *nextOut)) {
+		if (passes[i](*nextIn, *nextOut, opts)) {
 			logBlocks = true;
 		}
 
@@ -129,14 +129,14 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri
 		nextIn = &temp[0];
 	}
 
-	if (passes[c - 1](*nextIn, out)) {
+	if (passes[c - 1](*nextIn, out, opts)) {
 		logBlocks = true;
 	}
 
 	return logBlocks;
 }
 
-bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) {
+bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	const u32 *constants = !in.GetConstants().empty() ? &in.GetConstants()[0] : nullptr;
 	bool logBlocks = false;
 	IRInst prev;
@@ -191,7 +191,7 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) {
 }
 
 // Might be useful later on x86.
-bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) {
+bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	bool logBlocks = false;
 	for (int i = 0; i < (int)in.GetInstructions().size(); i++) {
 		IRInst inst = in.GetInstructions()[i];
@@ -245,7 +245,7 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out) {
 	return logBlocks;
 }
 
-bool PropagateConstants(const IRWriter &in, IRWriter &out) {
+bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	IRRegCache gpr(&out);
 
 	const u32 *constants = !in.GetConstants().empty() ? &in.GetConstants()[0] : nullptr;
@@ -619,7 +619,7 @@ int IRDestGPR(const IRInst &inst) {
 	return -1;
 }
 
-bool PurgeTemps(const IRWriter &in, IRWriter &out) {
+bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	std::vector<IRInst> insts;
 	insts.reserve(in.GetInstructions().size());
 
@@ -710,7 +710,7 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out) {
 	return logBlocks;
 }
 
-bool ReduceLoads(const IRWriter &in, IRWriter &out) {
+bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	for (u32 value : in.GetConstants()) {
 		out.AddConstant(value);
 	}
@@ -846,7 +846,7 @@ static std::vector<IRInst> ReorderLoadStoreOps(std::vector<IRInst> &ops, const u
 	return ops;
 }
 
-bool ReorderLoadStore(const IRWriter &in, IRWriter &out) {
+bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	bool logBlocks = false;
 
 	enum class RegState : u8 {
@@ -1042,7 +1042,7 @@ bool ReorderLoadStore(const IRWriter &in, IRWriter &out) {
 	return logBlocks;
 }
 
-bool MergeLoadStore(const IRWriter &in, IRWriter &out) {
+bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) {
 	bool logBlocks = false;
 
 	auto opsCompatible = [&](const IRInst &a, const IRInst &b, int dist) {
@@ -1076,16 +1076,15 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out) {
 					break;
 				}
 			}
-			// Warning: this may generate unaligned stores.
-			if (c == 2 || c == 3) {
+			if ((c == 2 || c == 3) && opts.unalignedLoadStore) {
 				inst.op = IROp::Store16;
 				out.Write(inst);
 				prev = inst;
-				// Skip the next one.
+				// Skip the next one (the 3rd will be separate.)
 				++i;
 				continue;
 			}
-			if (c == 4) {
+			if (c == 4 && opts.unalignedLoadStore) {
 				inst.op = IROp::Store32;
 				out.Write(inst);
 				prev = inst;
@@ -1108,8 +1107,7 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out) {
 					break;
 				}
 			}
-			// Warning: this may generate unaligned stores.
-			if (c == 2) {
+			if (c == 2 && opts.unalignedLoadStore) {
 				inst.op = IROp::Store32;
 				out.Write(inst);
 				prev = inst;
diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h
index aeb2cff238d7..118b44128f51 100644
--- a/Core/MIPS/IR/IRPassSimplify.h
+++ b/Core/MIPS/IR/IRPassSimplify.h
@@ -2,14 +2,14 @@
 
 #include "Core/MIPS/IR/IRInst.h"
 
-typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out);
-bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out);
+typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out, const IROptions &opts);
 
 // Block optimizer passes of varying usefulness.
-bool PropagateConstants(const IRWriter &in, IRWriter &out);
-bool PurgeTemps(const IRWriter &in, IRWriter &out);
-bool ReduceLoads(const IRWriter &in, IRWriter &out);
-bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out);
-bool OptimizeFPMoves(const IRWriter &in, IRWriter &out);
-bool ReorderLoadStore(const IRWriter &in, IRWriter &out);
-bool MergeLoadStore(const IRWriter &in, IRWriter &out);
+bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool ReduceLoads(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
+bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);

From 3abcc4d6d8442ca3a2b2e7a1751586c4a67b1eee Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 31 Dec 2017 17:21:41 -0800
Subject: [PATCH 6/6] irjit: Implement lwl/lwr/swl/swr.

This is very similar to the arm64jit implementation.
---
 Core/MIPS/IR/IRCompLoadStore.cpp | 117 +++++++++++++++++++++++++++++--
 1 file changed, 113 insertions(+), 4 deletions(-)

diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp
index d956e13d07c8..31beef109d82 100644
--- a/Core/MIPS/IR/IRCompLoadStore.cpp
+++ b/Core/MIPS/IR/IRCompLoadStore.cpp
@@ -41,16 +41,124 @@
 // #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
 #define CONDITIONAL_DISABLE ;
 #define DISABLE { Comp_Generic(op); return; }
+#define INVALIDOP { Comp_Generic(op); return; }
 
 namespace MIPSComp {
 	void IRFrontend::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
-		DISABLE;
+		CONDITIONAL_DISABLE;
+
+		int offset = _IMM16;
+		MIPSGPReg rt = _RT;
+		MIPSGPReg rs = _RS;
+		int o = op >> 26;
+
+		if (!js.inDelaySlot && opts.unalignedLoadStore) {
+			// Optimisation: Combine to single unaligned load/store.
+			const bool isLeft = (o == 34 || o == 42);
+			MIPSOpcode nextOp = GetOffsetInstruction(1);
+			// Find a matching shifted load/store in opposite direction with opposite offset.
+			if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) {
+				EatInstruction(nextOp);
+
+				if (isLeft) {
+					// Get the unaligned base offset from the lwr/swr instruction.
+					offset = (signed short)(nextOp & 0xFFFF);
+					// Already checked it if we're on the lwr.
+					CheckMemoryBreakpoint(rs, offset);
+				}
+
+				if (load) {
+					ir.Write(IROp::Load32, rt, rs, ir.AddConstant(offset));
+				} else {
+					ir.Write(IROp::Store32, rt, rs, ir.AddConstant(offset));
+				}
+				return;
+			}
+		}
+
+		int addrReg = IRTEMP_0;
+		int valueReg = IRTEMP_1;
+		int maskReg = IRTEMP_2;
+		int shiftReg = IRTEMP_3;
+
+		// addrReg = rs + imm
+		ir.Write(IROp::AddConst, addrReg, rs, ir.AddConstant(offset));
+		// shiftReg = (addr & 3) * 8
+		ir.Write(IROp::AndConst, shiftReg, addrReg, ir.AddConstant(3));
+		ir.Write(IROp::ShlImm, shiftReg, shiftReg, 3);
+		// addrReg = addr & 0xfffffffc (for stores, later)
+		ir.Write(IROp::AndConst, addrReg, addrReg, ir.AddConstant(0xFFFFFFFC));
+		// valueReg = RAM(addrReg)
+		ir.Write(IROp::Load32, valueReg, addrReg, ir.AddConstant(0));
+
+		switch (o) {
+		case 34: //lwl
+			// rt &= (0x00ffffff >> shift)
+			// Alternatively, could shift to a wall and back (but would require two shifts each way.)
+			ir.WriteSetConstant(maskReg, 0x00ffffff);
+			ir.Write(IROp::Shr, maskReg, maskReg, shiftReg);
+			ir.Write(IROp::And, rt, rt, maskReg);
+			// valueReg <<= (24 - shift)
+			ir.Write(IROp::Neg, shiftReg, shiftReg);
+			ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24));
+			ir.Write(IROp::Shl, valueReg, valueReg, shiftReg);
+			// rt |= valueReg
+			ir.Write(IROp::Or, rt, rt, valueReg);
+			break;
+		case 38: //lwr
+			// valueReg >>= shift
+			ir.Write(IROp::Shr, valueReg, valueReg, shiftReg);
+			// shiftReg = 24 - shift
+			ir.Write(IROp::Neg, shiftReg, shiftReg);
+			ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24));
+			// rt &= (0xffffff00 << (24 - shift))
+			// Alternatively, could shift to a wall and back (but would require two shifts each way.)
+			ir.WriteSetConstant(maskReg, 0xffffff00);
+			ir.Write(IROp::Shl, maskReg, maskReg, shiftReg);
+			ir.Write(IROp::And, rt, rt, maskReg);
+			// rt |= valueReg
+			ir.Write(IROp::Or, rt, rt, valueReg);
+			break;
+		case 42: //swl
+			// valueReg &= 0xffffff00 << shift
+			ir.WriteSetConstant(maskReg, 0xffffff00);
+			ir.Write(IROp::Shl, maskReg, maskReg, shiftReg);
+			ir.Write(IROp::And, valueReg, valueReg, maskReg);
+			// shiftReg = 24 - shift
+			ir.Write(IROp::Neg, shiftReg, shiftReg);
+			ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24));
+			// valueReg |= rt >> (24 - shift)
+			ir.Write(IROp::Shr, maskReg, rt, shiftReg);
+			ir.Write(IROp::Or, valueReg, valueReg, maskReg);
+			break;
+		case 46: //swr
+			// valueReg &= 0x00ffffff << (24 - shift)
+			ir.WriteSetConstant(maskReg, 0x00ffffff);
+			ir.Write(IROp::Neg, shiftReg, shiftReg);
+			ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24));
+			ir.Write(IROp::Shl, maskReg, maskReg, shiftReg);
+			ir.Write(IROp::And, valueReg, valueReg, maskReg);
+			ir.Write(IROp::Neg, shiftReg, shiftReg);
+			ir.Write(IROp::AddConst, shiftReg, shiftReg, ir.AddConstant(24));
+			// valueReg |= rt << shift
+			ir.Write(IROp::Shl, maskReg, rt, shiftReg);
+			ir.Write(IROp::Or, valueReg, valueReg, maskReg);
+			break;
+		default:
+			INVALIDOP;
+			return;
+		}
+
+		if (!load) {
+			// RAM(addrReg) = valueReg
+			ir.Write(IROp::Store32, valueReg, addrReg, ir.AddConstant(0));
+		}
 	}
 
 	void IRFrontend::Comp_ITypeMem(MIPSOpcode op) {
 		CONDITIONAL_DISABLE;
 
-		int offset = (signed short)(op & 0xFFFF);
+		int offset = _IMM16;
 		MIPSGPReg rt = _RT;
 		MIPSGPReg rs = _RS;
 		int o = op >> 26;
@@ -61,7 +169,6 @@ namespace MIPSComp {
 
 		CheckMemoryBreakpoint(rs, offset);
 
-		int addrReg = IRTEMP_0;
 		switch (o) {
 			// Load
 		case 35:
@@ -92,9 +199,11 @@ namespace MIPSComp {
 
 		case 34: //lwl
 		case 38: //lwr
+			Comp_ITypeMemLR(op, true);
+			break;
 		case 42: //swl
 		case 46: //swr
-			DISABLE;
+			Comp_ITypeMemLR(op, false);
 			break;
 
 		default: