diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index d6579811acfb..d7153a498443 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -245,7 +245,7 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA std::map m_fault_to_handler; std::map m_handler_to_loc; Arm64GPRCache gpr; - Arm64FPRCache fpr; + Arm64FPRCache fpr{gpr}; JitArm64BlockCache blocks{*this}; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 734f60b5bc31..e7e72725c6e7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -13,6 +13,7 @@ #include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" +#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h" #include "Core/PowerPC/JitArm64/Jit.h" using namespace Arm64Gen; @@ -381,7 +382,7 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg) // FPR Cache constexpr size_t GUEST_FPR_COUNT = 32; -Arm64FPRCache::Arm64FPRCache() : Arm64RegCache(GUEST_FPR_COUNT) +Arm64FPRCache::Arm64FPRCache(const Arm64GPRCache& gpr) : Arm64RegCache(GUEST_FPR_COUNT), m_gpr(gpr) { } @@ -756,22 +757,69 @@ void Arm64FPRCache::FixSinglePrecision(size_t preg) } } +// Since the following float conversion functions are used in non-arithmetic PPC float +// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs +// into QNaNs. This means we can't use FCVT/FCVTL/FCVTN. + +// Another problem is that officially, converting doubles to single format results in undefined +// behavior. Relying on undefined behavior is a bug so no software should ever do this. +// Super Mario 64 (on Wii VC) accidentally relies on this behavior. See issue #11173 + +// When calling the conversion functions, we are cheating a little and not +// saving the FPRs since we know the functions happen to not use them. + void Arm64FPRCache::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg) { - m_float_emit->FCVT(32, 64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + const BitSet32 gpr_saved = m_gpr.GetCallerSavedUsed(); + m_emit->ABI_PushRegisters(gpr_saved); + + m_float_emit->UMOV(64, X0, src_reg, 0); + m_emit->QuickCallFunction(X1, &ConvertToSingle); + m_float_emit->INS(32, dest_reg, 0, W0); + + m_emit->ABI_PopRegisters(gpr_saved); } void Arm64FPRCache::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg) { - m_float_emit->FCVTN(32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + const BitSet32 gpr_saved = m_gpr.GetCallerSavedUsed(); + m_emit->ABI_PushRegisters(gpr_saved); + + m_float_emit->UMOV(64, X0, src_reg, 0); + m_emit->QuickCallFunction(X1, &ConvertToSingle); + m_float_emit->INS(32, dest_reg, 0, W0); + + m_float_emit->UMOV(64, X0, src_reg, 1); + m_emit->QuickCallFunction(X1, &ConvertToSingle); + m_float_emit->INS(32, dest_reg, 1, W0); + + m_emit->ABI_PopRegisters(gpr_saved); } void Arm64FPRCache::ConvertSingleToDoubleLower(ARM64Reg dest_reg, ARM64Reg src_reg) { - m_float_emit->FCVT(64, 32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + const BitSet32 gpr_saved = m_gpr.GetCallerSavedUsed(); + m_emit->ABI_PushRegisters(gpr_saved); + + m_float_emit->UMOV(32, W0, src_reg, 0); + m_emit->QuickCallFunction(X1, &ConvertToDouble); + m_float_emit->INS(64, dest_reg, 0, X0); + + m_emit->ABI_PopRegisters(gpr_saved); } void Arm64FPRCache::ConvertSingleToDoublePair(ARM64Reg dest_reg, ARM64Reg src_reg) { - m_float_emit->FCVTL(64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg)); + const BitSet32 gpr_saved = m_gpr.GetCallerSavedUsed(); + m_emit->ABI_PushRegisters(gpr_saved); + + m_float_emit->UMOV(32, W0, src_reg, 1); + m_emit->QuickCallFunction(X1, &ConvertToDouble); + m_float_emit->INS(64, dest_reg, 1, X0); + + m_float_emit->UMOV(32, W0, src_reg, 0); + m_emit->QuickCallFunction(X1, &ConvertToDouble); + m_float_emit->INS(64, dest_reg, 0, X0); + + m_emit->ABI_PopRegisters(gpr_saved); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 7584ef7ad981..8c866a81293f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -274,7 +274,7 @@ class Arm64GPRCache : public Arm64RegCache class Arm64FPRCache : public Arm64RegCache { public: - Arm64FPRCache(); + Arm64FPRCache(const Arm64GPRCache& gpr); // Flushes the register cache in different ways depending on the mode void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr) override; @@ -311,4 +311,6 @@ class Arm64FPRCache : public Arm64RegCache bool IsCalleeSaved(Arm64Gen::ARM64Reg reg) const; void FlushRegisters(BitSet32 regs, bool maintain_state); + + const Arm64GPRCache& m_gpr; };