From 442a7012a6497e7632cfb617fe74c94b6f83e9e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Wed, 5 Jul 2017 17:00:15 +0200
Subject: [PATCH] Remove some rip addressing in vertex decoder

---
 GPU/Common/VertexDecoderX86.cpp | 60 ++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 12 deletions(-)

diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
index 850aef0c4004..c07096f08d45 100644
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@@ -478,9 +478,17 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 	CVTDQ2PS(XMM8, R(XMM8));
 	if (dec_->nweights > 4)
 		CVTDQ2PS(XMM9, R(XMM9));
-	MULPS(XMM8, M(&by128));
-	if (dec_->nweights > 4)
-		MULPS(XMM9, M(&by128));
+
+	if (RipAccessible(&by128)) {
+		MULPS(XMM8, M(&by128));  // rip accessible
+		if (dec_->nweights > 4)
+			MULPS(XMM9, M(&by128));  // rip accessible
+	} else {
+		MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
+		MULPS(XMM8, MatR(tempReg1));
+		if (dec_->nweights > 4)
+			MULPS(XMM9, MatR(tempReg1));
+	}
 
 	auto weightToAllLanes = [this](X64Reg dst, int lane) {
 		X64Reg src = lane < 4 ? XMM8 : XMM9;
@@ -521,7 +529,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 #else
 		MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->weightoff + j));
 		CVTSI2SS(weight, R(tempReg1));
-		MULSS(weight, M(&by128));
+		MULSS(weight, M(&by128));  // rip accessible (x86)
 		SHUFPS(weight, R(weight), _MM_SHUFFLE(0, 0, 0, 0));
 #endif
 		if (j == 0) {
@@ -584,9 +592,17 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
 	CVTDQ2PS(XMM8, R(XMM8));
 	if (dec_->nweights > 4)
 		CVTDQ2PS(XMM9, R(XMM9));
-	MULPS(XMM8, M(&by32768));
-	if (dec_->nweights > 4)
-		MULPS(XMM9, M(&by32768));
+
+	if (RipAccessible(&by32768)) {
+		MULPS(XMM8, M(&by32768));  // rip accessible
+		if (dec_->nweights > 4)
+			MULPS(XMM9, M(&by32768));  // rip accessible
+	} else {
+		MOV(PTRBITS, R(tempReg1), ImmPtr(&by32768));
+		MULPS(XMM8, MatR(tempReg1));
+		if (dec_->nweights > 4)
+			MULPS(XMM9, MatR(tempReg1));
+	}
 
 	auto weightToAllLanes = [this](X64Reg dst, int lane) {
 		X64Reg src = lane < 4 ? XMM8 : XMM9;
@@ -627,7 +643,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
 #else
 		MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff + j * 2));
 		CVTSI2SS(weight, R(tempReg1));
-		MULSS(weight, M(&by32768));
+		MULSS(weight, M(&by32768));  // rip accessible (x86)
 		SHUFPS(weight, R(weight), _MM_SHUFFLE(0, 0, 0, 0));
 #endif
 		if (j == 0) {
@@ -1364,7 +1380,12 @@ void VertexDecoderJitCache::Jit_AnyS8ToFloat(int srcoff) {
 		PSRAD(XMM1, 24);
 	}
 	CVTDQ2PS(XMM3, R(XMM1));
-	MULPS(XMM3, M(&by128));
+	if (RipAccessible(&by128)) {
+		MULPS(XMM3, M(&by128));
+	} else {
+		MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
+		MULPS(XMM3, MatR(tempReg1));
+	}
 }
 
 void VertexDecoderJitCache::Jit_AnyS16ToFloat(int srcoff) {
@@ -1407,7 +1428,12 @@ void VertexDecoderJitCache::Jit_AnyU8ToFloat(int srcoff, u32 bits) {
 		PUNPCKLWD(XMM1, R(XMM3));
 	}
 	CVTDQ2PS(XMM3, R(XMM1));
-	MULPS(XMM3, M(&by128));
+	if (RipAccessible(&by128)) {
+		MULPS(XMM3, M(&by128));
+	} else {
+		MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
+		MULPS(XMM3, MatR(tempReg1));
+	}
 }
 
 void VertexDecoderJitCache::Jit_AnyU16ToFloat(int srcoff, u32 bits) {
@@ -1442,7 +1468,12 @@ void VertexDecoderJitCache::Jit_AnyS8Morph(int srcoff, int dstoff) {
 	if (!cpu_info.bSSE4_1) {
 		PXOR(fpScratchReg4, R(fpScratchReg4));
 	}
-	MOVAPS(XMM5, M(by128));
+	if (RipAccessible(&by128)) {
+		MOVAPS(XMM5, M(&by128));
+	} else {
+		MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
+		MOVAPS(XMM5, MatR(tempReg1));
+	}
 
 	// Sum into fpScratchReg.
 	bool first = true;
@@ -1481,7 +1512,12 @@ void VertexDecoderJitCache::Jit_AnyS16Morph(int srcoff, int dstoff) {
 	if (!cpu_info.bSSE4_1) {
 		PXOR(fpScratchReg4, R(fpScratchReg4));
 	}
-	MOVAPS(XMM5, M(by32768));
+	if (RipAccessible(&by32768)) {
+		MOVAPS(XMM5, M(&by32768));
+	} else {
+		MOV(PTRBITS, R(tempReg1), ImmPtr(&by32768));
+		MOVAPS(XMM5, MatR(tempReg1));
+	}
 
 	// Sum into fpScratchReg.
 	bool first = true;