@@ -4,11 +4,14 @@

#include "Common/Arm64Emitter.h"
#include "Common/CommonTypes.h"
#include "Common/FloatUtils.h"
#include "Common/JitRegister.h"
#include "Common/MathUtil.h"

#include "Core/CoreTiming.h"
#include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
@@ -203,6 +206,12 @@ void JitArm64::GenerateCommonAsm()
GenerateConvertSingleToDouble();
JitRegister::Register(GetAsmRoutines()->cstd, GetCodePtr(), "JIT_cstd");

GetAsmRoutines()->fprf_single = GetCodePtr();
GenerateFPRF(true);
GetAsmRoutines()->fprf_double = GetCodePtr();
GenerateFPRF(false);
JitRegister::Register(GetAsmRoutines()->fprf_single, GetCodePtr(), "JIT_FPRF");

GenerateQuantizedLoadStores();
}

@@ -272,6 +281,91 @@ void JitArm64::GenerateConvertSingleToDouble()
RET();
}

// Input in X0. Outputs to memory (PPCState). Clobbers X0-X4 and flags.
void JitArm64::GenerateFPRF(bool single)
{
const auto reg_encoder = single ? EncodeRegTo32 : EncodeRegTo64;

const ARM64Reg input_reg = reg_encoder(ARM64Reg::W0);
const ARM64Reg temp_reg = reg_encoder(ARM64Reg::W1);
const ARM64Reg exp_reg = reg_encoder(ARM64Reg::W2);

constexpr ARM64Reg fprf_reg = ARM64Reg::W3;
constexpr ARM64Reg fpscr_reg = ARM64Reg::W4;

const auto INPUT_EXP_MASK = single ? Common::FLOAT_EXP : Common::DOUBLE_EXP;
const auto INPUT_FRAC_MASK = single ? Common::FLOAT_FRAC : Common::DOUBLE_FRAC;
constexpr u32 OUTPUT_SIGN_MASK = 0xC;

// This code is duplicated for the most common cases for performance.
// For the less common cases, we branch to an existing copy of this code.
auto emit_write_fprf_and_ret = [&] {
BFI(fpscr_reg, fprf_reg, FPRF_SHIFT, FPRF_WIDTH);
STR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
RET();
};

// First of all, start the load of the old FPSCR value, in case it takes a while
LDR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));

CMP(input_reg, 0); // Grab sign bit (conveniently the same bit for floats as for integers)
ANDI2R(exp_reg, input_reg, INPUT_EXP_MASK); // Grab exponent

// Most branches handle the sign in the same way. Perform that handling before branching
MOVI2R(ARM64Reg::W3, Common::PPC_FPCLASS_PN);
MOVI2R(ARM64Reg::W1, Common::PPC_FPCLASS_NN);
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W3, CCFlags::CC_LT);

FixupBranch zero_or_denormal = CBZ(exp_reg);

// exp != 0
MOVI2R(temp_reg, INPUT_EXP_MASK);
CMP(exp_reg, temp_reg);
FixupBranch nan_or_inf = B(CCFlags::CC_EQ);

// exp != 0 && exp != EXP_MASK
const u8* normal = GetCodePtr();
emit_write_fprf_and_ret();

// exp == 0
SetJumpTarget(zero_or_denormal);
TSTI2R(input_reg, INPUT_FRAC_MASK);
FixupBranch denormal;
if (single)
{
// To match the interpreter, what we output should be based on how the input would be classified
// after conversion to double. Converting a denormal single to a double always results in a
// normal double, so for denormal singles we need to output PPC_FPCLASS_PN/PPC_FPCLASS_NN.
// TODO: Hardware test that the interpreter actually is correct.
B(CCFlags::CC_NEQ, normal);
}
else
{
denormal = B(CCFlags::CC_NEQ);
}

// exp == 0 && frac == 0
LSR(ARM64Reg::W1, fprf_reg, 3);
MOVI2R(fprf_reg, Common::PPC_FPCLASS_PZ & ~OUTPUT_SIGN_MASK);
BFI(fprf_reg, ARM64Reg::W1, 4, 1);
const u8* write_fprf_and_ret = GetCodePtr();
emit_write_fprf_and_ret();

// exp == 0 && frac != 0
if (!single)
SetJumpTarget(denormal);
ORRI2R(fprf_reg, fprf_reg, Common::PPC_FPCLASS_PD & ~OUTPUT_SIGN_MASK);
B(write_fprf_and_ret);

// exp == EXP_MASK
SetJumpTarget(nan_or_inf);
TSTI2R(input_reg, INPUT_FRAC_MASK);
ORRI2R(ARM64Reg::W1, fprf_reg, Common::PPC_FPCLASS_PINF & ~OUTPUT_SIGN_MASK);
MOVI2R(ARM64Reg::W2, Common::PPC_FPCLASS_QNAN);
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W2, CCFlags::CC_EQ);
B(write_fprf_and_ret);
}

void JitArm64::GenerateQuantizedLoadStores()
{
// X0 is the scale
@@ -27,6 +27,8 @@ struct CommonAsmRoutinesBase
const u8* mfcr;
const u8* cdts;
const u8* cstd;
const u8* fprf_single;
const u8* fprf_double;

// In: array index: GQR to use.
// In: ECX: Address to read from.
@@ -24,6 +24,7 @@ elseif(_M_ARM_64)
add_dolphin_test(PowerPCTest
PowerPC/DivUtilsTest.cpp
PowerPC/JitArm64/ConvertSingleDouble.cpp
PowerPC/JitArm64/FPRF.cpp
PowerPC/JitArm64/MovI2R.cpp
)
else()
@@ -0,0 +1,86 @@
// Copyright 2021 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include <cinttypes>
#include <functional>
#include <vector>

#include "Common/Arm64Emitter.h"
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h"
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/PowerPC.h"

#include "../TestValues.h"

#include <gtest/gtest.h>

namespace
{
using namespace Arm64Gen;

class TestFPRF : public JitArm64
{
public:
TestFPRF()
{
AllocCodeSpace(4096);

const u8* raw_fprf_single = GetCodePtr();
GenerateFPRF(true);
const u8* raw_fprf_double = GetCodePtr();
GenerateFPRF(false);

fprf_single = Common::BitCast<void (*)(u32)>(GetCodePtr());
MOV(ARM64Reg::X15, ARM64Reg::X30);
MOV(ARM64Reg::X14, PPC_REG);
MOVP2R(PPC_REG, &PowerPC::ppcState);
BL(raw_fprf_single);
MOV(ARM64Reg::X30, ARM64Reg::X15);
MOV(PPC_REG, ARM64Reg::X14);
RET();

fprf_double = Common::BitCast<void (*)(u64)>(GetCodePtr());
MOV(ARM64Reg::X15, ARM64Reg::X30);
MOV(ARM64Reg::X14, PPC_REG);
MOVP2R(PPC_REG, &PowerPC::ppcState);
BL(raw_fprf_double);
MOV(ARM64Reg::X30, ARM64Reg::X15);
MOV(PPC_REG, ARM64Reg::X14);
RET();
}

std::function<void(u32)> fprf_single;
std::function<void(u64)> fprf_double;
};

} // namespace

static u32 RunUpdateFPRF(const std::function<void()>& f)
{
PowerPC::ppcState.fpscr.Hex = 0x12345678;
f();
return PowerPC::ppcState.fpscr.Hex;
}

TEST(JitArm64, FPRF)
{
TestFPRF test;

for (const u64 double_input : double_test_values)
{
const u32 expected_double =
RunUpdateFPRF([&] { PowerPC::UpdateFPRF(Common::BitCast<double>(double_input)); });
const u32 actual_double = RunUpdateFPRF([&] { test.fprf_double(double_input); });
EXPECT_EQ(expected_double, actual_double);

const u32 single_input = ConvertToSingle(double_input);

const u32 expected_single = RunUpdateFPRF(
[&] { PowerPC::UpdateFPRF(Common::BitCast<double>(ConvertToDouble(single_input))); });
const u32 actual_single = RunUpdateFPRF([&] { test.fprf_single(single_input); });
EXPECT_EQ(expected_single, actual_single);
}
}
@@ -83,6 +83,7 @@
</ItemGroup>
<ItemGroup Condition="'$(Platform)'=='ARM64'">
<ClCompile Include="Core\PowerPC\JitArm64\ConvertSingleDouble.cpp" />
<ClCompile Include="Core\PowerPC\JitArm64\FPRF.cpp" />
<ClCompile Include="Core\PowerPC\JitArm64\MovI2R.cpp" />
</ItemGroup>
<ItemGroup>