x86jit: Handle vmin/vmax and vsge correctly.

Unfortunately, this boots vmin/vmax from simd, currently.
hrydgard · Mar 29, 2015 · bdd1db1 · bdd1db1
1 parent 67d9233
commit bdd1db1
Showing 1 changed file with 73 additions and 11 deletions.
diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp
@@ -935,13 +935,40 @@ void Jit::Comp_Vcmov(MIPSOpcode op) {
 	fpr.ReleaseSpillLocks();
 }
 
+static s32 MEMORY_ALIGNED16(vminmax_sreg[4]);
+
+static s32 DoVminSS(s32 treg) {
+	s32 sreg = vminmax_sreg[0];
+
+	// If both are negative, we flip the comparison (not two's compliment.)
+	if (sreg < 0 && treg < 0) {
+		// If at least one side is NAN, we take the highest mantissa bits.
+		return treg < sreg ? sreg : treg;
+	} else {
+		// Otherwise, we take the lowest value (negative or lowest mantissa.)
+		return treg > sreg ? sreg : treg;
+	}
+}
+
+static s32 DoVmaxSS(s32 treg) {
+	s32 sreg = vminmax_sreg[0];
+
+	// This is the same logic as vmin, just reversed.
+	if (sreg < 0 && treg < 0) {
+		return treg < sreg ? treg : sreg;
+	} else {
+		return treg > sreg ? treg : sreg;
+	}
+}
+
 void Jit::Comp_VecDo3(MIPSOpcode op) {
 	CONDITIONAL_DISABLE;
 
 	if (js.HasUnknownPrefix())
 		DISABLE;
 
 	// Check that we can support the ops, and prepare temporary values for ops that need it.
+	bool allowSIMD = true;
 	switch (op >> 26) {
 	case 24: //VFPU0
 		switch ((op >> 23) & 7) {
@@ -965,6 +992,7 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
 		switch ((op >> 23) & 7) {
 		case 2:  // vmin
 		case 3:  // vmax
+			allowSIMD = false;
 			break;
 		case 6:  // vsge
 		case 7:  // vslt
@@ -986,7 +1014,7 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
 	GetVectorRegsPrefixT(tregs, sz, _VT);
 	GetVectorRegsPrefixD(dregs, sz, _VD);
 
-	if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, tregs, sz)) {
+	if (allowSIMD && fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, tregs, sz)) {
 		void (XEmitter::*opFunc)(X64Reg, OpArg) = nullptr;
 		bool symmetric = false;
 		switch (op >> 26) {
@@ -1017,21 +1045,24 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
 			switch ((op >> 23) & 7)
 			{
 			case 2:  // vmin
-				// TODO: Mishandles NaN.
+				// TODO: Mishandles NaN.  Disabled for now.
 				MOVAPS(XMM1, fpr.VS(sregs));
 				MINPS(XMM1, fpr.VS(tregs));
 				MOVAPS(fpr.VSX(dregs), R(XMM1));
 				break;
 			case 3:  // vmax
-				// TODO: Mishandles NaN.
+				// TODO: Mishandles NaN.  Disabled for now.
 				MOVAPS(XMM1, fpr.VS(sregs));
 				MAXPS(XMM1, fpr.VS(tregs));
 				MOVAPS(fpr.VSX(dregs), R(XMM1));
 				break;
 			case 6:  // vsge
-				// TODO: Mishandles NaN.
+				MOVAPS(XMM0, fpr.VS(tregs));
 				MOVAPS(XMM1, fpr.VS(sregs));
+				CMPPS(XMM0, R(XMM1), CMP_ORD);
 				CMPPS(XMM1, fpr.VS(tregs), CMP_NLT);
+
+				ANDPS(XMM1, R(XMM0));
 				ANDPS(XMM1, M(&oneOneOneOne));
 				MOVAPS(fpr.VSX(dregs), R(XMM1));
 				break;
@@ -1077,7 +1108,8 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
 		if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs, n, tregs))
 		{
 			// On 32-bit we only have 6 xregs for mips regs, use XMM0/XMM1 if possible.
-			if (i < 2)
+			// But for vmin/vmax/vsge, we need XMM0/XMM1, so avoid.
+			if (i < 2 && (op >> 26) != 27)
 				tempxregs[i] = (X64Reg) (XMM0 + i);
 			else
 			{
@@ -1128,16 +1160,46 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
 			switch ((op >> 23) & 7)
 			{
 			case 2:  // vmin
-				// TODO: Mishandles NaN.
-				MINSS(tempxregs[i], fpr.V(tregs[i]));
+				{
+					MOVSS(XMM0, fpr.V(tregs[i]));
+					UCOMISS(tempxregs[i], R(XMM0));
+					FixupBranch skip = J_CC(CC_NP, true);
+
+					MOVSS(M(&vminmax_sreg), tempxregs[i]);
+					MOVD_xmm(R(EAX), XMM0);
+					CallProtectedFunction(&DoVminSS, R(EAX));
+					MOVD_xmm(tempxregs[i], R(EAX));
+					FixupBranch finish = J();
+
+					SetJumpTarget(skip);
+					MINSS(tempxregs[i], R(XMM0));
+					SetJumpTarget(finish);
+				}
 				break;
 			case 3:  // vmax
-				// TODO: Mishandles NaN.
-				MAXSS(tempxregs[i], fpr.V(tregs[i]));
+				{
+					MOVSS(XMM0, fpr.V(tregs[i]));
+					UCOMISS(tempxregs[i], R(XMM0));
+					FixupBranch skip = J_CC(CC_NP, true);
+
+					MOVSS(M(&vminmax_sreg), tempxregs[i]);
+					MOVD_xmm(R(EAX), XMM0);
+					CallProtectedFunction(&DoVmaxSS, R(EAX));
+					MOVD_xmm(tempxregs[i], R(EAX));
+					FixupBranch finish = J();
+
+					SetJumpTarget(skip);
+					MAXSS(tempxregs[i], R(XMM0));
+					SetJumpTarget(finish);
+				}
 				break;
 			case 6:  // vsge
-				// TODO: Mishandles NaN.
-				CMPNLTSS(tempxregs[i], fpr.V(tregs[i]));
+				// We can't just reverse, because of 0/-0.
+				MOVSS(XMM0, fpr.V(tregs[i]));
+				MOVSS(XMM1, R(tempxregs[i]));
+				CMPORDSS(XMM1, R(XMM0));
+				CMPNLTSS(tempxregs[i], R(XMM0));
+				ANDPS(tempxregs[i], R(XMM1));
 				ANDPS(tempxregs[i], M(&oneOneOneOne));
 				break;
 			case 7:  // vslt