Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #474 from Sonicadvance1/conditional-branch
Support conditional register cache flushing on ARMv7.
  • Loading branch information
Sonicadvance1 committed Jun 10, 2014
2 parents be4b544 + 2989ccf commit 1db93db
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 31 deletions.
33 changes: 24 additions & 9 deletions Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp
Expand Up @@ -149,9 +149,6 @@ void JitArm::bcx(UGeckoInstruction inst)
JITDISABLE(bJITBranchOff)
// USES_CR

gpr.Flush();
fpr.Flush();

ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
Expand Down Expand Up @@ -194,6 +191,9 @@ void JitArm::bcx(UGeckoInstruction inst)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);

gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExit(destination);

if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
Expand All @@ -202,16 +202,17 @@ void JitArm::bcx(UGeckoInstruction inst)
SetJumpTarget( pCTRDontBranch );

if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
void JitArm::bcctrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITBranchOff)

gpr.Flush();
fpr.Flush();

// bcctrx doesn't decrement and/or test CTR
_dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!");

Expand All @@ -220,6 +221,9 @@ void JitArm::bcctrx(UGeckoInstruction inst)
// BO_2 == 1z1zz -> b always

//NPC = CTR & 0xfffffffc;
gpr.Flush();
fpr.Flush();

ARMReg rA = gpr.GetReg();

if (inst.LK_3)
Expand Down Expand Up @@ -261,22 +265,26 @@ void JitArm::bcctrx(UGeckoInstruction inst)
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);

WriteExitDestInR(rA);

SetJumpTarget(b);

if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
}
void JitArm::bclrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITBranchOff)

gpr.Flush();
fpr.Flush();

ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
Expand Down Expand Up @@ -326,6 +334,9 @@ void JitArm::bclrx(UGeckoInstruction inst)
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR

gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);

if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
Expand All @@ -334,5 +345,9 @@ void JitArm::bclrx(UGeckoInstruction inst)
SetJumpTarget( pCTRDontBranch );

if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
11 changes: 8 additions & 3 deletions Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp
Expand Up @@ -954,9 +954,6 @@ void JitArm::twx(UGeckoInstruction inst)

s32 a = inst.RA;

gpr.Flush();
fpr.Flush();

ARMReg RA = gpr.GetReg();
ARMReg RB = gpr.GetReg();
MOV(RA, inst.TO);
Expand Down Expand Up @@ -1003,6 +1000,9 @@ void JitArm::twx(UGeckoInstruction inst)
SetJumpTarget(take4);
SetJumpTarget(take5);

gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);

LDR(RA, R9, PPCSTATE_OFF(Exceptions));
MOVI2R(RB, EXCEPTION_PROGRAM); // XXX: Can be optimized
ORR(RA, RA, RB);
Expand All @@ -1016,7 +1016,12 @@ void JitArm::twx(UGeckoInstruction inst)
SetJumpTarget(exit5);

if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();

WriteExit(js.compilerPC + 4);
}

gpr.Unlock(RA, RB);
}
6 changes: 4 additions & 2 deletions Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp
Expand Up @@ -446,12 +446,14 @@ void JitArm::lXX(UGeckoInstruction inst)
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
ARMReg RD = gpr.R(d);
gpr.Flush();
fpr.Flush();

// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);

gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);

rA = gpr.GetReg();

MOVI2R(rA, (u32)&PowerPC::OnIdle);
Expand Down
41 changes: 34 additions & 7 deletions Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp
Expand Up @@ -32,6 +32,27 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)

void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state

for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][0].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][1].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}

ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
Expand Down Expand Up @@ -162,7 +183,7 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
return GetPPCReg(preg, true, preLoad);
}

void ArmFPRCache::Flush()
void ArmFPRCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
Expand All @@ -172,19 +193,25 @@ void ArmFPRCache::Flush()
u32 regindex = _regs[a][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);

ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);

ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h
Expand Up @@ -43,7 +43,7 @@ class ArmFPRCache

ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ARMReg V0);
void Flush();
void Flush(FlushMode mode = FLUSH_ALL);
ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register
ARMReg R1(u32 preg, bool preLoad = true);
};
40 changes: 34 additions & 6 deletions Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp
Expand Up @@ -30,6 +30,19 @@ void ArmRegCache::Init(ARMXEmitter *emitter)
}
void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[a].Flush();
}
}

ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count)
Expand Down Expand Up @@ -192,21 +205,36 @@ void ArmRegCache::SetImmediate(u32 preg, u32 imm)
regs[preg].LoadToImm(imm);
}

void ArmRegCache::Flush()
void ArmRegCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_IMM)
BindToRegister(a);
{
if (mode == FLUSH_ALL)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(a);
}
else
{
ARMReg tmp = GetReg();
emit->MOVI2R(tmp, regs[a].GetImm());
emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4);
Unlock(tmp);
}
}
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
regs[a].Flush();
}
}

regs[a].Flush();
}
}

11 changes: 8 additions & 3 deletions Source/Core/Core/PowerPC/JitArm32/JitRegCache.h
Expand Up @@ -29,6 +29,12 @@ enum RegType
REG_AWAY, // Bound to a register, but not preloaded
};

enum FlushMode
{
FLUSH_ALL = 0,
FLUSH_MAINTAIN_STATE,
};

class OpArg
{
private:
Expand Down Expand Up @@ -116,9 +122,8 @@ class ArmRegCache
void Start(PPCAnalyst::BlockRegStats &stats);

ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 =
INVALID_REG);
void Flush();
void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = INVALID_REG);
void Flush(FlushMode mode = FLUSH_ALL);
ARMReg R(u32 preg); // Returns a cached register
bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; }
u32 GetImm(u32 preg) { return regs[preg].GetImm(); }
Expand Down

0 comments on commit 1db93db

Please sign in to comment.