Permalink
Browse files

Jit64AsmCommon: Reduce branches in fast-path and inline most behavior

The only behavior requires a function call is denormal behaviour. We also fix fpscr exception raising.
  • Loading branch information...
MerryMage committed Sep 23, 2018
1 parent 826bcad commit 89a0b0de32101ad88ae4b56fdd60f72b98a6c7ae
Showing with 43 additions and 17 deletions.
  1. +43 −17 Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -32,17 +32,14 @@ void CommonAsmRoutines::GenFrsqrte()
// This function clobbers all three RSCRATCH.
MOVQ_xmm(R(RSCRATCH), XMM0);
// Negative and zero inputs set an exception and take the complex path.
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch zero = J_CC(CC_Z, true);
FixupBranch negative = J_CC(CC_S, true);
// Extract exponent
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
// Zero and max exponents (non-normal floats) take the complex path.
FixupBranch complex1 = J_CC(CC_Z, true);
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
FixupBranch complex2 = J_CC(CC_E, true);
// Negatives, zeros, denormals, infinities and NaNs take the complex path.
LEA(32, RSCRATCH2, MDisp(RSCRATCH_EXTRA, -1));
CMP(32, R(RSCRATCH2), Imm32(0x7FE));
FixupBranch complex = J_CC(CC_AE, true);
SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD));
SAR(32, R(RSCRATCH_EXTRA), Imm8(1));
@@ -75,24 +72,53 @@ void CommonAsmRoutines::GenFrsqrte()
MOVQ_xmm(XMM0, R(RSCRATCH2));
RET();
// Exception flags for zero input.
SetJumpTarget(zero);
SetJumpTarget(complex);
AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
FixupBranch nan_or_inf = J_CC(CC_E);
MOV(64, R(RSCRATCH2), R(RSCRATCH));
SHL(64, R(RSCRATCH2), Imm8(1));
FixupBranch nonzero = J_CC(CC_NZ);
// +0.0 or -0.0
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
FixupBranch skip_set_fx1 = J_CC(CC_NZ);
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX));
FixupBranch complex3 = J();
SetJumpTarget(skip_set_fx1);
MOV(64, R(RSCRATCH2), Imm64(0x7FF0'0000'0000'0000));
OR(64, R(RSCRATCH2), R(RSCRATCH));
MOVQ_xmm(XMM0, R(RSCRATCH2));
RET();
// SNaN or QNaN or +Inf or -Inf
SetJumpTarget(nan_or_inf);
MOV(64, R(RSCRATCH2), R(RSCRATCH));
SHL(64, R(RSCRATCH2), Imm8(12));
FixupBranch inf = J_CC(CC_Z);
BTS(64, R(RSCRATCH), Imm8(51));
MOVQ_xmm(XMM0, R(RSCRATCH));
RET();
SetJumpTarget(inf);
BT(64, R(RSCRATCH), Imm8(63));
FixupBranch negative = J_CC(CC_C);
XORPD(XMM0, R(XMM0));
RET();
SetJumpTarget(nonzero);
FixupBranch denormal = J_CC(CC_NC);
// Exception flags for negative input.
// Negative sign
SetJumpTarget(negative);
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT));
FixupBranch skip_set_fx2 = J_CC(CC_NZ);
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT));
SetJumpTarget(skip_set_fx1);
SetJumpTarget(skip_set_fx2);
SetJumpTarget(complex1);
SetJumpTarget(complex2);
SetJumpTarget(complex3);
MOV(64, R(RSCRATCH2), Imm64(0x7FF8'0000'0000'0000));
MOVQ_xmm(XMM0, R(RSCRATCH2));
RET();
SetJumpTarget(denormal);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);

0 comments on commit 89a0b0d

Please sign in to comment.