Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JitArm64: Skip checking last input for NaN for non-SIMD operations #12092

Merged
merged 2 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
47 changes: 17 additions & 30 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
const bool fma = use_b && use_c;
const bool negate_result = (op5 & ~0x1) == 30;

// Addition and subtraction can't generate new NaNs, they can only take NaNs from inputs
const bool can_generate_nan = (op5 & ~0x1) != 20;

const bool output_is_single = inst.OPCD == 59;
const bool inaccurate_fma = op5 > 25 && !Config::Get(Config::SESSION_USE_FMA);
const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[inst.FC];
Expand Down Expand Up @@ -203,45 +200,35 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
if (use_c && VA != VC && (!use_b || VB != VC))
inputs.push_back(VC);

// If any inputs are NaNs, pick the first NaN of them and set its quiet bit
for (size_t i = 0; i < inputs.size(); ++i)
// If any inputs are NaNs, pick the first NaN of them and set its quiet bit.
// However, we can skip checking the last input, because if exactly one input is NaN, AArch64
// arithmetic instructions automatically pick that NaN and make it quiet, just like we want.
for (size_t i = 0; i < inputs.size() - 1; ++i)
{
// Skip checking if the input is a NaN if it's the last input and we're guaranteed to have at
// least one NaN input
const bool check_input = can_generate_nan || i != inputs.size() - 1;

const ARM64Reg input = inputs[i];
FixupBranch skip;
if (check_input)
{
m_float_emit.FCMP(input);
skip = B(CCFlags::CC_VC);
}

m_float_emit.FCMP(input);
FixupBranch skip = B(CCFlags::CC_VC);

// Make the NaN quiet
m_float_emit.FADD(VD, input, input);

nan_fixups.push_back(B());

if (check_input)
SetJumpTarget(skip);
SetJumpTarget(skip);
}

std::optional<FixupBranch> nan_early_fixup;
if (can_generate_nan)
if (negate_result)
{
// If we have a NaN, we must not execute FNEG.
if (result_reg != VD)
m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg));
nan_fixups.push_back(B());
}
else
{
// There was no NaN in any of the inputs, so the NaN must have been generated by the
// arithmetic instruction. In this case, the result is already correct.
if (negate_result)
{
if (result_reg != VD)
m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg));
nan_fixups.push_back(B());
}
else
{
nan_early_fixup = B();
}
nan_early_fixup = B();
}

SwitchToNearCode();
Expand Down
56 changes: 10 additions & 46 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,49 +380,21 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
const ARM64Reg VC = fpr.R(c, type);
const ARM64Reg VD = fpr.RW(d, type);
const ARM64Reg V0 = fpr.GetReg();
const ARM64Reg temp_gpr = m_accurate_nans && !singles ? gpr.GetReg() : ARM64Reg::INVALID_REG;

m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VB), 1);

FixupBranch a_nan_done, b_nan_done;
if (m_accurate_nans)
{
const auto check_nan = [&](ARM64Reg input) {
m_float_emit.FCMP(scalar_reg_encoder(input));
FixupBranch not_nan = B(CCFlags::CC_VC);
FixupBranch nan = B();
SetJumpTarget(not_nan);

SwitchToFarCode();
SetJumpTarget(nan);

if (upper)
{
m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(input),
scalar_reg_encoder(input));
m_float_emit.TRN1(size, reg_encoder(VD), reg_encoder(VC), reg_encoder(V0));
}
else if (d != c)
{
m_float_emit.FADD(scalar_reg_encoder(VD), scalar_reg_encoder(input),
scalar_reg_encoder(input));
m_float_emit.INS(size, VD, 1, VC, 1);
}
else
{
m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(input),
scalar_reg_encoder(input));
m_float_emit.INS(size, VD, 0, V0, 0);
}

FixupBranch nan_done = B();
SwitchToNearCode();

return nan_done;
};

a_nan_done = check_nan(VA);
b_nan_done = check_nan(V0);
// If the first input is NaN, set the temp register for the second input to 0. This is because:
//
// - If the second input is also NaN, setting it to 0 ensures that the first NaN will be picked.
// - If only the first input is NaN, setting the second input to 0 has no effect on the result.
//
// Either way, we can then do an FADD as usual, and the FADD will make the NaN quiet.
m_float_emit.FCMP(scalar_reg_encoder(VA));
FixupBranch a_not_nan = B(CCFlags::CC_VC);
m_float_emit.MOVI(64, scalar_reg_encoder(V0), 0);
SetJumpTarget(a_not_nan);
}

if (upper)
Expand All @@ -441,15 +413,7 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
m_float_emit.INS(size, VD, 0, V0, 0);
}

if (m_accurate_nans)
{
SetJumpTarget(a_nan_done);
SetJumpTarget(b_nan_done);
}

fpr.Unlock(V0);
if (temp_gpr != ARM64Reg::INVALID_REG)
gpr.Unlock(temp_gpr);

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sumX");
Expand Down