Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #9472 from JosJuice/jitarm64-unexpected-double
JitArm64: Update registers last used before start of instruction
  • Loading branch information
leoetlino committed Jan 27, 2021
2 parents 2a6fffd + d004304 commit 2d75b0d
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 10 deletions.
9 changes: 9 additions & 0 deletions Source/Core/Core/PowerPC/JitArm64/Jit.cpp
Expand Up @@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);

// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
gpr.UpdateLastUsed(op.regsIn | op.regsOut);

BitSet32 fpr_used = op.fregsIn;
if (op.fregOut >= 0)
fpr_used[op.fregOut] = true;
fpr.UpdateLastUsed(fpr_used);

// Gather pipe writes using a non-immediate address are discovered by profiling.
bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end();

Expand Down
39 changes: 32 additions & 7 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
Expand Up @@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
bool use_b = op5 != 25; // fmul uses no B

bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
(!use_c || fpr.IsSingle(c, !packed));
const auto inputs_are_singles_func = [&] {
return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
(!use_c || fpr.IsSingle(c, !packed));
};
const bool inputs_are_singles = inputs_are_singles_func();

ARM64Reg VA{}, VB{}, VC{}, VD{};

Expand Down Expand Up @@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
}
}

ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
"Register allocation turned singles into doubles in the middle of fp_arith");

if (single || packed)
fpr.FixSinglePrecision(d);
}
Expand Down Expand Up @@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
break;
}
}

ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed),
"Register allocation turned singles into doubles in the middle of fp_logic");
}

void JitArm64::fselx(UGeckoInstruction inst)
Expand All @@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst)
const u32 c = inst.FC;
const u32 d = inst.FD;

const bool a_single = fpr.IsSingle(a, true);
if (fpr.IsSingle(a, true))
{
const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle);
Expand All @@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst)
m_float_emit.FCMPE(EncodeRegToDouble(VA));
}

const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair;
const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble;
const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;

const ARM64Reg VB = fpr.R(b, type);
const ARM64Reg VC = fpr.R(c, type);
const ARM64Reg VD = fpr.RW(d, type);

m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);

ASSERT_MSG(DYNA_REC,
a_single == fpr.IsSingle(a, true) &&
b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)),
"Register allocation turned singles into doubles in the middle of fselx");
}

void JitArm64::frspx(UGeckoInstruction inst)
Expand All @@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
const u32 b = inst.FB;
const u32 d = inst.FD;

if (fpr.IsSingle(b, true))
const bool single = fpr.IsSingle(b, true);
if (single)
{
// Source is already in single precision, so no need to do anything but to copy to PSR1.
const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle);
Expand All @@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst)

m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
}

ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of frspx");
}

void JitArm64::fcmpX(UGeckoInstruction inst)
Expand Down Expand Up @@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
SetJumpTarget(continue3);
}
SetJumpTarget(continue1);

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
"Register allocation turned singles into doubles in the middle of fcmpX");
}

void JitArm64::fctiwzx(UGeckoInstruction inst)
Expand All @@ -334,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
const bool single = fpr.IsSingle(b, true);

const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d);
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);

const ARM64Reg V0 = fpr.GetReg();

Expand All @@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
}
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0);

ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
"Register allocation turned singles into doubles in the middle of fctiwzx");
}
16 changes: 16 additions & 0 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
Expand Up @@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
break;
}

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)),
"Register allocation turned singles into doubles in the middle of ps_mergeXX");
}

void JitArm64::ps_mulsX(UGeckoInstruction inst)
Expand All @@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)

m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0);

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_mulsX");

fpr.FixSinglePrecision(d);
}

Expand Down Expand Up @@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op");
break;
}

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_maddXX");

fpr.FixSinglePrecision(d);

if (V0Q != INVALID_REG)
Expand Down Expand Up @@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
m_float_emit.MOV(VD, V0);
fpr.Unlock(V0Q);
}

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sel");
}

void JitArm64::ps_sumX(UGeckoInstruction inst)
Expand Down Expand Up @@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
}

ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_sumX");

fpr.FixSinglePrecision(d);

fpr.Unlock(V0);
Expand Down
14 changes: 13 additions & 1 deletion Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
Expand Up @@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg()
// Holy cow, how did you run out of registers?
// We can't return anything reasonable in this case. Return INVALID_REG and watch the failure
// happen
WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb");
ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!");
return INVALID_REG;
}

void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
{
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
OpArg& reg = m_guest_registers[i];
if (i < 32 && regs_used[i])
reg.ResetLastUsed();
else
reg.IncrementLastUsed();
}
}

u32 Arm64RegCache::GetUnlockedRegisterCount() const
{
u32 unlocked_registers = 0;
Expand Down
6 changes: 4 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
Expand Up @@ -140,6 +140,8 @@ class Arm64RegCache
// Requires unlocking after done
Arm64Gen::ARM64Reg GetReg();

void UpdateLastUsed(BitSet32 regs_used);

// Locks a register so a cache cannot use it
// Useful for function calls
template <typename T = Arm64Gen::ARM64Reg, typename... Args>
Expand Down Expand Up @@ -281,9 +283,9 @@ class Arm64FPRCache : public Arm64RegCache

// Returns a guest register inside of a host register
// Will dump an immediate to the host register as well
Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair);
Arm64Gen::ARM64Reg R(size_t preg, RegType type);

Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type);

BitSet32 GetCallerSavedUsed() const override;

Expand Down

0 comments on commit 2d75b0d

Please sign in to comment.