dolphin-emu · lioncash · Nov 28, 2023 · Aug 9, 2023 · Aug 10, 2023
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
@@ -80,9 +80,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
   const bool fma = use_b && use_c;
   const bool negate_result = (op5 & ~0x1) == 30;
 
-  // Addition and subtraction can't generate new NaNs, they can only take NaNs from inputs
-  const bool can_generate_nan = (op5 & ~0x1) != 20;
-
   const bool output_is_single = inst.OPCD == 59;
   const bool inaccurate_fma = op5 > 25 && !Config::Get(Config::SESSION_USE_FMA);
   const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[inst.FC];
@@ -203,45 +200,35 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
     if (use_c && VA != VC && (!use_b || VB != VC))
       inputs.push_back(VC);
 
-    // If any inputs are NaNs, pick the first NaN of them and set its quiet bit
-    for (size_t i = 0; i < inputs.size(); ++i)
+    // If any inputs are NaNs, pick the first NaN of them and set its quiet bit.
+    // However, we can skip checking the last input, because if exactly one input is NaN, AArch64
+    // arithmetic instructions automatically pick that NaN and make it quiet, just like we want.
+    for (size_t i = 0; i < inputs.size() - 1; ++i)
     {
-      // Skip checking if the input is a NaN if it's the last input and we're guaranteed to have at
-      // least one NaN input
-      const bool check_input = can_generate_nan || i != inputs.size() - 1;
-
       const ARM64Reg input = inputs[i];
-      FixupBranch skip;
-      if (check_input)
-      {
-        m_float_emit.FCMP(input);
-        skip = B(CCFlags::CC_VC);
-      }
+
+      m_float_emit.FCMP(input);
+      FixupBranch skip = B(CCFlags::CC_VC);
 
       // Make the NaN quiet
       m_float_emit.FADD(VD, input, input);
 
       nan_fixups.push_back(B());
 
-      if (check_input)
-        SetJumpTarget(skip);
+      SetJumpTarget(skip);
     }
 
     std::optional<FixupBranch> nan_early_fixup;
-    if (can_generate_nan)
+    if (negate_result)
+    {
+      // If we have a NaN, we must not execute FNEG.
+      if (result_reg != VD)
+        m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg));
+      nan_fixups.push_back(B());
+    }
+    else
     {
-      // There was no NaN in any of the inputs, so the NaN must have been generated by the
-      // arithmetic instruction. In this case, the result is already correct.
-      if (negate_result)
-      {
-        if (result_reg != VD)
-          m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg));
-        nan_fixups.push_back(B());
-      }
-      else
-      {
-        nan_early_fixup = B();
-      }
+      nan_early_fixup = B();
     }
 
     SwitchToNearCode();

diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
@@ -380,49 +380,21 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
   const ARM64Reg VC = fpr.R(c, type);
   const ARM64Reg VD = fpr.RW(d, type);
   const ARM64Reg V0 = fpr.GetReg();
-  const ARM64Reg temp_gpr = m_accurate_nans && !singles ? gpr.GetReg() : ARM64Reg::INVALID_REG;
 
   m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VB), 1);
 
-  FixupBranch a_nan_done, b_nan_done;
   if (m_accurate_nans)
   {
-    const auto check_nan = [&](ARM64Reg input) {
-      m_float_emit.FCMP(scalar_reg_encoder(input));
-      FixupBranch not_nan = B(CCFlags::CC_VC);
-      FixupBranch nan = B();
-      SetJumpTarget(not_nan);
-
-      SwitchToFarCode();
-      SetJumpTarget(nan);
-
-      if (upper)
-      {
-        m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(input),
-                          scalar_reg_encoder(input));
-        m_float_emit.TRN1(size, reg_encoder(VD), reg_encoder(VC), reg_encoder(V0));
-      }
-      else if (d != c)
-      {
-        m_float_emit.FADD(scalar_reg_encoder(VD), scalar_reg_encoder(input),
-                          scalar_reg_encoder(input));
-        m_float_emit.INS(size, VD, 1, VC, 1);
-      }
-      else
-      {
-        m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(input),
-                          scalar_reg_encoder(input));
-        m_float_emit.INS(size, VD, 0, V0, 0);
-      }
-
-      FixupBranch nan_done = B();
-      SwitchToNearCode();
-
-      return nan_done;
-    };
-
-    a_nan_done = check_nan(VA);
-    b_nan_done = check_nan(V0);
+    // If the first input is NaN, set the temp register for the second input to 0. This is because:
+    //
+    // - If the second input is also NaN, setting it to 0 ensures that the first NaN will be picked.
+    // - If only the first input is NaN, setting the second input to 0 has no effect on the result.
+    //
+    // Either way, we can then do an FADD as usual, and the FADD will make the NaN quiet.
+    m_float_emit.FCMP(scalar_reg_encoder(VA));
+    FixupBranch a_not_nan = B(CCFlags::CC_VC);
+    m_float_emit.MOVI(64, scalar_reg_encoder(V0), 0);
+    SetJumpTarget(a_not_nan);
   }
 
   if (upper)
@@ -441,15 +413,7 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
     m_float_emit.INS(size, VD, 0, V0, 0);
   }
 
-  if (m_accurate_nans)
-  {
-    SetJumpTarget(a_nan_done);
-    SetJumpTarget(b_nan_done);
-  }
-
   fpr.Unlock(V0);
-  if (temp_gpr != ARM64Reg::INVALID_REG)
-    gpr.Unlock(temp_gpr);
 
   ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
              "Register allocation turned singles into doubles in the middle of ps_sumX");