Skip to content

Commit

Permalink
An attempt to combine FPU regcache writebacks with VSTMIA. Disabled d…
Browse files Browse the repository at this point in the history
…ue to bugs.
  • Loading branch information
hrydgard committed Mar 11, 2014
1 parent 6b709a6 commit adadf11
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 21 deletions.
2 changes: 0 additions & 2 deletions Common/ArmEmitter.cpp
Expand Up @@ -2577,8 +2577,6 @@ void ARMXEmitter::VREV16(u32 Size, ARMReg Vd, ARMReg Vm)
VREVX(2, Size, Vd, Vm);
}

// UNTESTED

// See page A8-878 in ARMv7-A Architecture Reference Manual

// Dest is a Q register, Src is a D register.
Expand Down
4 changes: 2 additions & 2 deletions Core/MIPS/ARM/ArmCompVFPU.cpp
Expand Up @@ -1046,8 +1046,8 @@ namespace MIPSComp
VMOV(tmp[i], fpr.V(sregs[i]));
}

// This always converts four 32-bit floats in Q0 to four 16-bit floats
// in D0. If we are dealing with a pair here, we just ignore the upper two outputs.
// This always converts four 16-bit floats in D0 to four 32-bit floats
// in Q0. If we are dealing with a pair here, we just ignore the upper two outputs.
// There are also a couple of other instructions that do it one at a time but doesn't
// seem worth the trouble.
VCVTF32F16(Q0, D0);
Expand Down
109 changes: 92 additions & 17 deletions Core/MIPS/ARM/ArmRegCacheFPU.cpp
Expand Up @@ -302,6 +302,98 @@ void ArmRegCacheFPU::FlushR(MIPSReg r) {
mr[r].reg = (int)INVALID_REG;
}

void ArmRegCacheFPU::FlushAll() {
// Discard temps!
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
DiscardR(i);
}

#if 0
// Causes crashes and weird glitches. Really not sure what's going on as the logs look ok.
// TODO: VSTMIA requires NEON so we should check for that.

This comment has been minimized.

Copy link
@xsacha

xsacha Mar 11, 2014

Collaborator

The VSTMIA shouldn't need NEON.
Says VFPv2 here.

This comment has been minimized.

Copy link
@hrydgard

hrydgard Mar 11, 2014

Author Owner

Oh, sorry, remembered wrong.

This comment has been minimized.

Copy link
@unknownbrackets

unknownbrackets Mar 11, 2014

Collaborator

On line 338, I can't seem to comment (not sure why, on my phone), but it forgets the armreg in a non continuous case, so it never flushes the legitimately dirty reg properly. Instead it should flush there and still flush/drop the others that were in a row.

-[Unknown]

This comment has been minimized.

Copy link
@hrydgard

hrydgard Mar 11, 2014

Author Owner

Good catch, fixing that and a small dirtying issue makes it run correctly. However I can see in disassembly that it's missing many obvious opportunities for some reason... I guess I'll add some more logging.

This comment has been minimized.

Copy link
@unknownbrackets

unknownbrackets Mar 11, 2014

Collaborator

Well, the fpu regs don't need to be I'm order, the armregs do. I would go through arm first, flushing continuous, then the fpu and flush the rest.

-[Unknown]

This comment has been minimized.

Copy link
@hrydgard

hrydgard Mar 11, 2014

Author Owner

Well for a contiguous write to the MIPS register backing store, both the MIPS and ARM regs need to be contiguous, no?

Anyway, I don't like this code, might redo it in a manner similar to your implementation for the GPRs which I forgot about.

This comment has been minimized.

Copy link
@hrydgard

hrydgard Mar 11, 2014

Author Owner

Ah, the reason it's missing stuff from the VFPU registers is the non-sequentialness of the MIPS reg offsets. Need to loop through the VFPU registers in a similar staggered order.

This comment has been minimized.

Copy link
@unknownbrackets

unknownbrackets Mar 11, 2014

Collaborator

Ah right. Yeah gprs are different because you specify with flags.

-[Unknown]

This comment has been minimized.

Copy link
@hrydgard

hrydgard Mar 11, 2014

Author Owner

I rewrote the thing in a new commit, turned out to be simpler to loop over the ARM regs first indeed.

int lastARMReg = INVALID_REG;
int continuityStartARMReg = INVALID_REG;
int continuityStartMIPSReg = -1;
for (int r = 0; r < NUM_MIPSFPUREG + 1; r++) {
if (r == NUM_MIPSFPUREG)
goto drop;

if (mr[r].loc == ML_ARMREG) {
if (mr[r].reg == (int)INVALID_REG) {
ERROR_LOG(JIT, "FlushAll %i: MipsReg had bad ArmReg", r);
}
if (ar[mr[r].reg].isDirty) {
ar[mr[r].reg].isDirty = false;
if (continuityStartARMReg == INVALID_REG) {
lastARMReg = mr[r].reg;
continuityStartARMReg = mr[r].reg;
continuityStartMIPSReg = r;
ILOG("Starting continuity: A%i M%i", continuityStartARMReg, continuityStartMIPSReg);
// goto drop;
} else {
if (mr[r].reg != lastARMReg + 1) {
// Continuity mismatch - drop.
ar[mr[r].reg].mipsReg = -1;
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
goto drop;
}
lastARMReg = mr[r].reg;
}
} else {
// Continuity dirty mismatch - drop.
ar[mr[r].reg].mipsReg = -1;
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
goto drop;
}
ar[mr[r].reg].mipsReg = -1;
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
continue;
} else {
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
goto drop;
}

continue;

drop:
// Continuity ended. See if we have anything to flush.
if (lastARMReg != INVALID_REG) {
ILOG("Ending continuity at A%i (start: A%i)", lastARMReg, continuityStartARMReg);
if (lastARMReg == continuityStartARMReg) {
// Single one. Just do a VSTR.
ILOG("Writing single ARM reg: A%i (M%i)", continuityStartARMReg, continuityStartMIPSReg);
emit_->VSTR((ARMReg)(continuityStartARMReg + S0), CTXREG, GetMipsRegOffset(continuityStartMIPSReg));
} else {
ILOG("Writing multiple ARM regs : A%i - A%i M%i", continuityStartARMReg, lastARMReg, continuityStartMIPSReg);
// VSTMIA!
emit_->ADDI2R(R0, CTXREG, GetMipsRegOffset(continuityStartMIPSReg), R1);
int count = lastARMReg - continuityStartARMReg + 1;
ILOG("VSTMIA R0, %i, %i", continuityStartARMReg, count);
emit_->VSTMIA(R0, false, (ARMReg)(S0 + continuityStartARMReg), count);
}
}

lastARMReg = INVALID_REG;
continuityStartARMReg = INVALID_REG;
}
#else
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
FlushR(i);
}
#endif

// Sanity check
for (int i = 0; i < numARMFpuReg_; i++) {
if (ar[i].mipsReg != -1) {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
}

void ArmRegCacheFPU::DiscardR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
Expand Down Expand Up @@ -351,23 +443,6 @@ int ArmRegCacheFPU::GetTempR() {
return -1;
}


void ArmRegCacheFPU::FlushAll() {
// Discard temps!
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
DiscardR(i);
}
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
FlushR(i);
}
// Sanity check
for (int i = 0; i < numARMFpuReg_; i++) {
if (ar[i].mipsReg != -1) {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
}

int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
if (r < 0 || r > 32 + 128 + NUM_TEMPS) {
Expand Down

0 comments on commit adadf11

Please sign in to comment.