Permalink
Browse files

Merge pull request #843 from FioraAeterna/fprf

JIT: Initial FPRF support
  • Loading branch information...
Sonicadvance1 committed Aug 28, 2014
2 parents 1cf77c7 + 7dbc623 commit 0217fb200805c2c10fbb8f395d88d86137731c6d
@@ -383,6 +383,9 @@ union UReg_MSR
UReg_MSR() { Hex = 0; }
};
#define FPRF_SHIFT 12
#define FPRF_MASK (0x1F << FPRF_SHIFT)
// Floating Point Status and Control Register
union UReg_FPSCR
{
@@ -97,14 +97,14 @@ static GekkoOPTemplate primarytable[] =
static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU, 1, 0, 0, 0}},
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU, 1, 0, 0, 0}},
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{40, Interpreter::ps_neg, {"ps_neg", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{136, Interpreter::ps_nabs, {"ps_nabs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{264, Interpreter::ps_abs, {"ps_abs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{72, Interpreter::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{528, Interpreter::ps_merge00, {"ps_merge00", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{560, Interpreter::ps_merge01, {"ps_merge01", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
{592, Interpreter::ps_merge10, {"ps_merge10", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
@@ -115,23 +115,23 @@ static GekkoOPTemplate table4[] =
static GekkoOPTemplate table4_2[] =
{
{10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU, 17, 0, 0, 0}},
{20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}},
{20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{23, Interpreter::ps_sel, {"ps_sel", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU, 2, 0, 0, 0}},
{28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
{24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}},
{28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
};
@@ -307,51 +307,51 @@ static GekkoOPTemplate table31_2[] =
static GekkoOPTemplate table59[] =
{
{18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 17, 0, 0, 0}}, // TODO
{20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
//{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, // Not implemented on gekko
{24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO
{20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
//{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, // Not implemented on gekko
{24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
};
static GekkoOPTemplate table63[] =
{
{264, Interpreter::fabsx, {"fabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{14, Interpreter::fctiwx, {"fctiwx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{15, Interpreter::fctiwzx, {"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{72, Interpreter::fmrx, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{136, Interpreter::fnabsx, {"fnabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{40, Interpreter::fnegx, {"fnegx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU, 1, 0, 0, 0}},
{583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU, 1, 0, 0, 0}},
{70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
{38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
{134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
{711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
{12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
{583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
{70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
{38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
{134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
{711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
};
static GekkoOPTemplate table63_2[] =
{
{18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 31, 0, 0, 0}},
{20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}},
{20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{23, Interpreter::fselx, {"fselx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
{25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
{31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
};
namespace InterpreterTables
{
@@ -116,11 +116,12 @@ class Jit64 : public Jitx86Base
// Generates a branch that will check if a given bit of a CR register part
// is set or not.
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
void SetFPRFIfNeeded(UGeckoInstruction inst, Gen::X64Reg xmm);
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS = false);
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS = false);
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
// OPCODES
void unknown_instruction(UGeckoInstruction _inst);
@@ -14,7 +14,7 @@ static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x800000
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS)
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS)
{
fpr.Lock(d, a, b);
if (roundRHS)
@@ -88,25 +88,35 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
UNPCKLPD(fpr.RX(d), R(fpr.RX(d)));
}
}
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
}
// We can avoid calculating FPRF if it's not needed; every float operation resets it, so
// if it's going to be clobbered in a future instruction before being read, we can just
// not calculate it.
void Jit64::SetFPRFIfNeeded(UGeckoInstruction inst, X64Reg xmm)
{
// As far as we know, the games that use this flag only need FPRF for fmul and fmadd, but
// FPRF is fast enough in JIT that we might as well just enable it for every float instruction
// if the enableFPRF flag is set.
if (Core::g_CoreStartupParameter.bEnableFPRF && js.op->wantsFPRF)
SetFPRF(xmm);
}
void Jit64::fp_arith(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
// Only the interpreter has "proper" support for (some) FP flags
FALLBACK_IF(inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF);
bool single = inst.OPCD == 59;
switch (inst.SUBOP5)
{
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD); break; //add
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, single); break; //mul
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, inst); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, inst); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, inst); break; //add
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, inst, single); break; //mul
default:
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
}
@@ -118,9 +128,6 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
// Only the interpreter has "proper" support for (some) FP flags
FALLBACK_IF(inst.SUBOP5 == 29 && Core::g_CoreStartupParameter.bEnableFPRF);
bool single_precision = inst.OPCD == 59;
int a = inst.FA;
@@ -165,9 +172,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
{
MOVSD(fpr.RX(d), R(XMM0));
}
// SMB checks flags after this op. Let's lie.
//AND(32, M(&PowerPC::ppcState.fpscr), Imm32(~((0x80000000 >> 19) | (0x80000000 >> 15))));
//OR(32, M(&PowerPC::ppcState.fpscr), Imm32((0x80000000 >> 16)));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
}
@@ -241,6 +246,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
fpr.Lock(a,b);
fpr.BindToRegister(b, true);
AND(32, M(&FPSCR), Imm32(~FPRF_MASK));
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
@@ -264,21 +270,26 @@ void Jit64::fcmpx(UGeckoInstruction inst)
}
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ)));
OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT));
continue1 = J();
SetJumpTarget(pNaN);
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO)));
OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT));
if (a != b)
{
continue2 = J();
SetJumpTarget(pGreater);
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT)));
OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT));
continue3 = J();
SetJumpTarget(pLesser);
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT)));
OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT));
}
SetJumpTarget(continue1);
@@ -113,7 +113,7 @@ add a,b,a
*/
//There's still a little bit more optimization that can be squeezed out of this
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg), bool roundRHS)
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg), UGeckoInstruction inst, bool roundRHS)
{
fpr.Lock(d, a, b);
@@ -163,6 +163,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6
(this->*op)(fpr.RX(d), fpr.R(b));
}
ForceSinglePrecisionP(fpr.RX(d));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
}
@@ -175,16 +176,16 @@ void Jit64::ps_arith(UGeckoInstruction inst)
switch (inst.SUBOP5)
{
case 18: // div
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD);
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD, inst);
break;
case 20: // sub
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD);
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD, inst);
break;
case 21: // add
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD);
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD, inst);
break;
case 25: // mul
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD, true);
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD, inst, true);
break;
default:
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
@@ -228,6 +229,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
PanicAlert("ps_sum WTF!!!");
}
ForceSinglePrecisionP(fpr.RX(d));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
}
@@ -267,6 +269,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
PanicAlert("ps_muls WTF!!!");
}
ForceSinglePrecisionP(fpr.RX(d));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
}
@@ -372,5 +375,6 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0));
ForceSinglePrecisionP(fpr.RX(d));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
}
@@ -284,15 +284,6 @@ void Jit64::mfcr(UGeckoInstruction inst)
gpr.UnlockAllX();
}
// convert flags into 64-bit CR values with a lookup table
static const u64 m_crTable[16] =
{
PPCCRToInternal(0x0), PPCCRToInternal(0x1), PPCCRToInternal(0x2), PPCCRToInternal(0x3),
PPCCRToInternal(0x4), PPCCRToInternal(0x5), PPCCRToInternal(0x6), PPCCRToInternal(0x7),
PPCCRToInternal(0x8), PPCCRToInternal(0x9), PPCCRToInternal(0xA), PPCCRToInternal(0xB),
PPCCRToInternal(0xC), PPCCRToInternal(0xD), PPCCRToInternal(0xE), PPCCRToInternal(0xF),
};
void Jit64::mtcrf(UGeckoInstruction inst)
{
INSTRUCTION_START
Oops, something went wrong.

0 comments on commit 0217fb2

Please sign in to comment.