Skip to content
Permalink
Browse files
Merge pull request #9637 from JosJuice/jitarm64-fprf
JitArm64: Implement FPRF updates
  • Loading branch information
lioncash committed May 13, 2021
2 parents bf16f77 + 25dc059 commit 725ea3d
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 34 deletions.
@@ -3039,28 +3039,31 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
{
EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);
}
else if (IsGPR(Rd) != IsGPR(Rn))
{
const ARM64Reg gpr = IsGPR(Rn) ? Rn : Rd;
const ARM64Reg fpr = IsGPR(Rn) ? Rd : Rn;

const int sf = Is64Bit(gpr) ? 1 : 0;
const int type = Is64Bit(gpr) ? (top ? 2 : 1) : 0;
const int rmode = top ? 1 : 0;
const int opcode = IsGPR(Rn) ? 7 : 6;

ASSERT_MSG(DYNA_REC, !top || IsQuad(fpr), "FMOV: top can only be used with quads");

// TODO: Should this check be more lenient? Sometimes you do want to do things like
// read the lower 32 bits of a double
ASSERT_MSG(DYNA_REC,
(!Is64Bit(gpr) && IsSingle(fpr)) ||
(Is64Bit(gpr) && ((IsDouble(fpr) && !top) || (IsQuad(fpr) && top))),
"FMOV: Mismatched sizes");

Write32((sf << 31) | (0x1e << 24) | (type << 22) | (1 << 21) | (rmode << 19) | (opcode << 16) |
(DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
else
{
ASSERT_MSG(DYNA_REC, !IsQuad(Rd) && !IsQuad(Rn), "FMOV can't move to/from quads");
int rmode = 0;
int opcode = 6;
int sf = 0;
if (IsSingle(Rd) && !Is64Bit(Rn) && !top)
{
// GPR to scalar single
opcode |= 1;
}
else if (!Is64Bit(Rd) && IsSingle(Rn) && !top)
{
// Scalar single to GPR - defaults are correct
}
else
{
// TODO
ASSERT_MSG(DYNA_REC, 0, "FMOV: Unhandled case");
}
Write32((sf << 31) | (0x1e2 << 20) | (rmode << 19) | (opcode << 16) | (DecodeReg(Rn) << 5) |
DecodeReg(Rd));
ASSERT_MSG(DYNA_REC, 0, "FMOV: Unsupported case");
}
}

@@ -399,6 +399,7 @@ union UReg_MSR
};

#define FPRF_SHIFT 12
#define FPRF_WIDTH 5
#define FPRF_MASK (0x1F << FPRF_SHIFT)

// FPSCR exception flags
@@ -234,6 +234,7 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
void GenerateCommonAsm();
void GenerateConvertDoubleToSingle();
void GenerateConvertSingleToDouble();
void GenerateFPRF(bool single);
void GenerateQuantizedLoadStores();

// Profiling
@@ -262,6 +263,8 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
Arm64Gen::ARM64Reg),
bool Rc = false);

void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg);

// <Fastmem fault location, slowmem handler location>
std::map<const u8*, FastmemArea> m_fault_to_handler;
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
@@ -9,19 +9,33 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/PowerPC.h"

using namespace Arm64Gen;

void JitArm64::SetFPRFIfNeeded(bool single, ARM64Reg reg)
{
if (!SConfig::GetInstance().bFPRF || !js.op->wantsFPRF)
return;

gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);

reg = single ? EncodeRegToSingle(reg) : EncodeRegToDouble(reg);
m_float_emit.FMOV(single ? ARM64Reg::W0 : ARM64Reg::X0, reg);
BL(single ? GetAsmRoutines()->fprf_single : GetAsmRoutines()->fprf_double);

gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
}

void JitArm64::fp_arith(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);

u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
u32 op5 = inst.SUBOP5;
@@ -120,13 +134,17 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
}
}

if (single || packed)
const bool outputs_are_singles = single || packed;

if (outputs_are_singles)
{
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
"Register allocation turned singles into doubles in the middle of fp_arith");

fpr.FixSinglePrecision(d);
}

SetFPRFIfNeeded(outputs_are_singles, VD);
}

void JitArm64::fp_logic(UGeckoInstruction inst)
@@ -252,7 +270,6 @@ void JitArm64::frspx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);

const u32 b = inst.FB;
const u32 d = inst.FD;
@@ -269,21 +286,26 @@ void JitArm64::frspx(UGeckoInstruction inst)

ASSERT_MSG(DYNA_REC, fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of frspx");

SetFPRFIfNeeded(true, VD);
}
else
{
const ARM64Reg VB = fpr.R(b, RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d, RegType::DuplicatedSingle);

m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));

SetFPRFIfNeeded(true, VD);
}
}

void JitArm64::fcmpX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);

const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF;

const u32 a = inst.FA;
const u32 b = inst.FB;
@@ -299,6 +321,14 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
gpr.BindCRToRegister(crf, false);
const ARM64Reg XA = gpr.CR(crf);

ARM64Reg fpscr_reg;
if (fprf)
{
fpscr_reg = gpr.GetReg();
LDR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
ANDI2R(fpscr_reg, fpscr_reg, ~FPRF_MASK);
}

FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3;
ORR(XA, ARM64Reg::ZR, 32, 0, true);
@@ -317,24 +347,33 @@ void JitArm64::fcmpX(UGeckoInstruction inst)

// A == B
ORR(XA, XA, 64 - 63, 0, true);
if (fprf)
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_EQ << FPRF_SHIFT);

continue1 = B();

SetJumpTarget(pNaN);

MOVI2R(XA, PowerPC::ConditionRegister::PPCToInternal(PowerPC::CR_SO));
if (fprf)
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_SO << FPRF_SHIFT);

if (a != b)
{
continue2 = B();

SetJumpTarget(pGreater);
ORR(XA, XA, 0, 0, true);
if (fprf)
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_GT << FPRF_SHIFT);

continue3 = B();

SetJumpTarget(pLesser);
ORR(XA, XA, 64 - 62, 1, true);
ORR(XA, XA, 0, 0, true);
if (fprf)
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_LT << FPRF_SHIFT);

SetJumpTarget(continue2);
SetJumpTarget(continue3);
@@ -343,6 +382,12 @@ void JitArm64::fcmpX(UGeckoInstruction inst)

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
"Register allocation turned singles into doubles in the middle of fcmpX");

if (fprf)
{
STR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscr_reg);
}
}

void JitArm64::fctiwzx(UGeckoInstruction inst)
@@ -371,12 +416,12 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
}
else
{
const ARM64Reg V1 = gpr.GetReg();
const ARM64Reg WA = gpr.GetReg();

m_float_emit.FCVTS(V1, EncodeRegToDouble(VB), RoundingMode::Z);
m_float_emit.FMOV(EncodeRegToSingle(VD), V1);
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
m_float_emit.FMOV(EncodeRegToSingle(VD), WA);

gpr.Unlock(V1);
gpr.Unlock(WA);
}
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0);
@@ -76,7 +76,6 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);

const u32 a = inst.FA;
const u32 c = inst.FC;
@@ -99,14 +98,15 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
"Register allocation turned singles into doubles in the middle of ps_mulsX");

fpr.FixSinglePrecision(d);

SetFPRFIfNeeded(true, VD);
}

void JitArm64::ps_maddXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);

const u32 a = inst.FA;
const u32 b = inst.FB;
@@ -257,13 +257,15 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
break;
}

if (V0Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V0Q);

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_maddXX");

fpr.FixSinglePrecision(d);

if (V0Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V0Q);
SetFPRFIfNeeded(true, VD);
}

void JitArm64::ps_sel(UGeckoInstruction inst)
@@ -311,7 +313,6 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);

const u32 a = inst.FA;
const u32 b = inst.FB;
@@ -343,10 +344,12 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
}

fpr.Unlock(V0);

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sumX");

fpr.FixSinglePrecision(d);

fpr.Unlock(V0);
SetFPRFIfNeeded(true, VD);
}

0 comments on commit 725ea3d

Please sign in to comment.