Skip to content

Commit

Permalink
Merge pull request #18081 from unknownbrackets/arm64jit-float
Browse files Browse the repository at this point in the history
arm64jit: Implement some float compares and conversions
  • Loading branch information
hrydgard committed Sep 6, 2023
2 parents f2512e0 + 01ed48a commit 1bfa566
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 7 deletions.
61 changes: 61 additions & 0 deletions Common/Arm64Emitter.cpp
Expand Up @@ -3011,6 +3011,12 @@ void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(1, 2, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(1, 3, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
{
u32 imm5 = 0;
Expand Down Expand Up @@ -3184,6 +3190,61 @@ void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
}

void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(true, size >> 4, 0b10001, Rd, Rn, Rm);
}

void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(false, size >> 4, 0b00111, Rd, Rn, Rm);
}

void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(false, size >> 4, 0b00110, Rd, Rn, Rm);
}

void ARM64FloatEmitter::CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(true, size >> 4, 0b00110, Rd, Rn, Rm);
}

void ARM64FloatEmitter::CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(true, size >> 4, 0b00111, Rd, Rn, Rm);
}

void ARM64FloatEmitter::CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(false, size >> 4, 0b10001, Rd, Rn, Rm);
}

void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01001, Rd, Rn);
}

void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01000, Rd, Rn);
}

void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01000, Rd, Rn);
}

void ARM64FloatEmitter::CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01001, Rd, Rn);
}

void ARM64FloatEmitter::CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01010, Rd, Rn);
}

// Move
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
{
Expand Down
14 changes: 14 additions & 0 deletions Common/Arm64Emitter.h
Expand Up @@ -851,6 +851,8 @@ class ARM64FloatEmitter
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
Expand Down Expand Up @@ -894,6 +896,18 @@ class ARM64FloatEmitter
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);

void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);

// Move
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
Expand Down
7 changes: 3 additions & 4 deletions Core/MIPS/ARM64/Arm64IRAsm.cpp
Expand Up @@ -240,12 +240,11 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
for (size_t i = 0; i < ARRAY_SIZE(roundModes); ++i) {
convertS0ToSCRATCH1_[i] = AlignCode16();

// Invert 0x80000000 -> 0x7FFFFFFF for the NAN result.
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
fp_.FCMP(S0, S0); // Detect NaN
fp_.FCVTS(S0, S0, roundModes[i]);
FixupBranch skip = B(CC_VC);
MOVI2R(SCRATCH2, 0x7FFFFFFF);
fp_.FMOV(S0, SCRATCH2);
SetJumpTarget(skip);
fp_.FCSEL(S0, S0, SCRATCHF2, CC_VC);

RET();
}
Expand Down
108 changes: 105 additions & 3 deletions Core/MIPS/ARM64/Arm64IRCompFPU.cpp
Expand Up @@ -116,15 +116,55 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
case IROp::FCmp:
switch (inst.dest) {
case IRFpCompareMode::False:
regs_.SetGPRImm(IRREG_FPCOND, 0);
break;

case IRFpCompareMode::EitherUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_VS);
break;

case IRFpCompareMode::EqualOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_EQ);
break;

case IRFpCompareMode::EqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_EQ);
// If ordered, use the above result. If unordered, use ZR+1 (being 1.)
CSINC(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND), WZR, CC_VC);
break;

case IRFpCompareMode::LessEqualOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LS);
break;

case IRFpCompareMode::LessEqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LE);
break;

case IRFpCompareMode::LessOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LO);
break;

case IRFpCompareMode::LessUnordered:
CompIR_Generic(inst);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LT);
break;

default:
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
}
break;

Expand All @@ -143,24 +183,68 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
void Arm64JitBackend::CompIR_FCondAssign(IRInst inst) {
CONDITIONAL_DISABLE;

// For Vec4, we could basically just ORR FCMPGE/FCMPLE together, but overlap is trickier.
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
FixupBranch unordered = B(CC_VS);

switch (inst.op) {
case IROp::FMin:
fp_.FMIN(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;

case IROp::FMax:
CompIR_Generic(inst);
fp_.FMAX(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;

default:
INVALIDOP;
break;
}

FixupBranch orderedDone = B();

// Not sure if this path is fast, trying to optimize it to be small but correct.
// Probably an uncommon path.
SetJumpTarget(unordered);
fp_.AND(EncodeRegToDouble(SCRATCHF1), regs_.FD(inst.src1), regs_.FD(inst.src2));
// SCRATCHF1 = 0xFFFFFFFF if sign bit set on both, 0x00000000 otherwise.
fp_.CMLT(32, EncodeRegToDouble(SCRATCHF1), EncodeRegToDouble(SCRATCHF1));

switch (inst.op) {
case IROp::FMin:
fp_.SMAX(32, EncodeRegToDouble(SCRATCHF2), regs_.FD(inst.src1), regs_.FD(inst.src2));
fp_.SMIN(32, regs_.FD(inst.dest), regs_.FD(inst.src1), regs_.FD(inst.src2));
break;

case IROp::FMax:
fp_.SMIN(32, EncodeRegToDouble(SCRATCHF2), regs_.FD(inst.src1), regs_.FD(inst.src2));
fp_.SMAX(32, regs_.FD(inst.dest), regs_.FD(inst.src1), regs_.FD(inst.src2));
break;

default:
INVALIDOP;
break;
}
// Replace dest with SCRATCHF2 if both were less than zero.
fp_.BIT(regs_.FD(inst.dest), EncodeRegToDouble(SCRATCHF2), EncodeRegToDouble(SCRATCHF1));

SetJumpTarget(orderedDone);
}

void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
CONDITIONAL_DISABLE;

switch (inst.op) {
case IROp::FCvtWS:
CompIR_Generic(inst);
break;

case IROp::FCvtSW:
regs_.Map(inst);
fp_.SCVTF(regs_.F(inst.dest), regs_.F(inst.src1));
break;

case IROp::FCvtScaledWS:
case IROp::FCvtScaledSW:
CompIR_Generic(inst);
Expand All @@ -175,18 +259,36 @@ void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
void Arm64JitBackend::CompIR_FRound(IRInst inst) {
CONDITIONAL_DISABLE;

regs_.Map(inst);
// Invert 0x80000000 -> 0x7FFFFFFF for the NAN result.
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF1), 0x80, 24);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));

// Luckily, these already saturate.
switch (inst.op) {
case IROp::FRound:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_N);
break;

case IROp::FTrunc:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_Z);
break;

case IROp::FCeil:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_P);
break;

case IROp::FFloor:
CompIR_Generic(inst);
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_M);
break;

default:
INVALIDOP;
break;
}

// Switch to INT_MAX if it was NAN.
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF1, CC_VC);
}

void Arm64JitBackend::CompIR_FSat(IRInst inst) {
Expand Down
3 changes: 3 additions & 0 deletions Core/MIPS/RiscV/RiscVCompFPU.cpp
Expand Up @@ -405,6 +405,9 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
regs_.MarkGPRDirty(IRREG_FPCOND, true);
break;

default:
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
}
break;

Expand Down
3 changes: 3 additions & 0 deletions Core/MIPS/x86/X64IRCompFPU.cpp
Expand Up @@ -322,6 +322,9 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
// B/CF = LESS THAN or UNORDERED.
ccToFpcond(inst.src1, inst.src2, CC_B);
break;

default:
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
}
break;

Expand Down

0 comments on commit 1bfa566

Please sign in to comment.