Skip to content

Commit

Permalink
x86jit: Handle vmin/vmax and vsge correctly.
Browse files Browse the repository at this point in the history
Unfortunately, this boots vmin/vmax from simd, currently.
  • Loading branch information
unknownbrackets committed Mar 29, 2015
1 parent 67d9233 commit bdd1db1
Showing 1 changed file with 73 additions and 11 deletions.
84 changes: 73 additions & 11 deletions Core/MIPS/x86/CompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,13 +935,40 @@ void Jit::Comp_Vcmov(MIPSOpcode op) {
fpr.ReleaseSpillLocks();
}

static s32 MEMORY_ALIGNED16(vminmax_sreg[4]);

static s32 DoVminSS(s32 treg) {
s32 sreg = vminmax_sreg[0];

// If both are negative, we flip the comparison (not two's compliment.)
if (sreg < 0 && treg < 0) {
// If at least one side is NAN, we take the highest mantissa bits.
return treg < sreg ? sreg : treg;
} else {
// Otherwise, we take the lowest value (negative or lowest mantissa.)
return treg > sreg ? sreg : treg;
}
}

static s32 DoVmaxSS(s32 treg) {
s32 sreg = vminmax_sreg[0];

// This is the same logic as vmin, just reversed.
if (sreg < 0 && treg < 0) {
return treg < sreg ? treg : sreg;
} else {
return treg > sreg ? treg : sreg;
}
}

void Jit::Comp_VecDo3(MIPSOpcode op) {
CONDITIONAL_DISABLE;

if (js.HasUnknownPrefix())
DISABLE;

// Check that we can support the ops, and prepare temporary values for ops that need it.
bool allowSIMD = true;
switch (op >> 26) {
case 24: //VFPU0
switch ((op >> 23) & 7) {
Expand All @@ -965,6 +992,7 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
switch ((op >> 23) & 7) {
case 2: // vmin
case 3: // vmax
allowSIMD = false;
break;
case 6: // vsge
case 7: // vslt
Expand All @@ -986,7 +1014,7 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
GetVectorRegsPrefixT(tregs, sz, _VT);
GetVectorRegsPrefixD(dregs, sz, _VD);

if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, tregs, sz)) {
if (allowSIMD && fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, tregs, sz)) {
void (XEmitter::*opFunc)(X64Reg, OpArg) = nullptr;
bool symmetric = false;
switch (op >> 26) {
Expand Down Expand Up @@ -1017,21 +1045,24 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
switch ((op >> 23) & 7)
{
case 2: // vmin
// TODO: Mishandles NaN.
// TODO: Mishandles NaN. Disabled for now.
MOVAPS(XMM1, fpr.VS(sregs));
MINPS(XMM1, fpr.VS(tregs));
MOVAPS(fpr.VSX(dregs), R(XMM1));
break;
case 3: // vmax
// TODO: Mishandles NaN.
// TODO: Mishandles NaN. Disabled for now.
MOVAPS(XMM1, fpr.VS(sregs));
MAXPS(XMM1, fpr.VS(tregs));
MOVAPS(fpr.VSX(dregs), R(XMM1));
break;
case 6: // vsge
// TODO: Mishandles NaN.
MOVAPS(XMM0, fpr.VS(tregs));
MOVAPS(XMM1, fpr.VS(sregs));
CMPPS(XMM0, R(XMM1), CMP_ORD);
CMPPS(XMM1, fpr.VS(tregs), CMP_NLT);

ANDPS(XMM1, R(XMM0));
ANDPS(XMM1, M(&oneOneOneOne));
MOVAPS(fpr.VSX(dregs), R(XMM1));
break;
Expand Down Expand Up @@ -1077,7 +1108,8 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs, n, tregs))
{
// On 32-bit we only have 6 xregs for mips regs, use XMM0/XMM1 if possible.
if (i < 2)
// But for vmin/vmax/vsge, we need XMM0/XMM1, so avoid.
if (i < 2 && (op >> 26) != 27)
tempxregs[i] = (X64Reg) (XMM0 + i);
else
{
Expand Down Expand Up @@ -1128,16 +1160,46 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
switch ((op >> 23) & 7)
{
case 2: // vmin
// TODO: Mishandles NaN.
MINSS(tempxregs[i], fpr.V(tregs[i]));
{
MOVSS(XMM0, fpr.V(tregs[i]));
UCOMISS(tempxregs[i], R(XMM0));
FixupBranch skip = J_CC(CC_NP, true);

MOVSS(M(&vminmax_sreg), tempxregs[i]);
MOVD_xmm(R(EAX), XMM0);
CallProtectedFunction(&DoVminSS, R(EAX));
MOVD_xmm(tempxregs[i], R(EAX));
FixupBranch finish = J();

SetJumpTarget(skip);
MINSS(tempxregs[i], R(XMM0));
SetJumpTarget(finish);
}
break;
case 3: // vmax
// TODO: Mishandles NaN.
MAXSS(tempxregs[i], fpr.V(tregs[i]));
{
MOVSS(XMM0, fpr.V(tregs[i]));
UCOMISS(tempxregs[i], R(XMM0));
FixupBranch skip = J_CC(CC_NP, true);

MOVSS(M(&vminmax_sreg), tempxregs[i]);
MOVD_xmm(R(EAX), XMM0);
CallProtectedFunction(&DoVmaxSS, R(EAX));
MOVD_xmm(tempxregs[i], R(EAX));
FixupBranch finish = J();

SetJumpTarget(skip);
MAXSS(tempxregs[i], R(XMM0));
SetJumpTarget(finish);
}
break;
case 6: // vsge
// TODO: Mishandles NaN.
CMPNLTSS(tempxregs[i], fpr.V(tregs[i]));
// We can't just reverse, because of 0/-0.
MOVSS(XMM0, fpr.V(tregs[i]));
MOVSS(XMM1, R(tempxregs[i]));
CMPORDSS(XMM1, R(XMM0));
CMPNLTSS(tempxregs[i], R(XMM0));
ANDPS(tempxregs[i], R(XMM1));
ANDPS(tempxregs[i], M(&oneOneOneOne));
break;
case 7: // vslt
Expand Down

0 comments on commit bdd1db1

Please sign in to comment.