Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[ARM] Optimization to psq_l, no need to push/pop regs anymore. Implem…
…ent support for single float loading, gives a decent speedup to Ikaruga in menus and game.
  • Loading branch information
Sonicadvance1 committed Sep 8, 2013
1 parent e5b5713 commit ba0c52b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 21 deletions.
16 changes: 4 additions & 12 deletions Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp
Expand Up @@ -28,26 +28,18 @@ void JitArm::psq_l(UGeckoInstruction inst)

if (js.memcheck) { Default(inst); return; }

if (inst.W) {
// Enable when supporting single loads
Default(inst);
return;
}

LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
//UBFX(R12, R11, 2, 6); // Scale
UBFX(R11, R11, 13, 3); // Type
UBFX(R12, R11, 13, 3); // Type
UBFX(R11, R11, 2, 6); // Scale

MOVI2R(R10, (u32)offset);
if (inst.RA)
ADD(R10, R10, gpr.R(inst.RA));
if (update)
MOV(gpr.R(inst.RA), R10);
if (inst.W)
ADD(R11, R11, 8);
MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized);
ADD(R14, R14, R11);
LDR(R14, R14);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);

// Values returned in S0, S1
BL(R14); // Jump to the quantizer Load
Expand Down
30 changes: 21 additions & 9 deletions Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp
Expand Up @@ -145,25 +145,37 @@ void JitArmAsmRoutineManager::Generate()

void JitArmAsmRoutineManager::GenerateCommon()
{
// R14 is LR
// R12 is scratch
// R11 is scale
// R10 is the address
Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
NEONXEmitter nemit(this);

const u8* loadPairedIllegal = GetCodePtr();
BKPT(0x10);

const u8* loadPairedFloatTwo = GetCodePtr();
PUSH(2, R12, _LR);
// R12, R14 is scratch
// R10 is the address
Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(R10, R10, mask);
MOVI2R(R14, (u32)Memory::base);
ADD(R10, R10, R14);
MOVI2R(R12, (u32)Memory::base);
ADD(R10, R10, R12);

NEONXEmitter nemit(this);
nemit.VLD1(I_32, D0, R10);
nemit.VREV32(I_8, D0, D0);

MOV(_PC, _LR);

POP(2, R12, _PC);
const u8* loadPairedFloatOne = GetCodePtr();
BKPT(0x12);
BIC(R10, R10, mask);
MOVI2R(R12, (u32)Memory::base);
ADD(R10, R10, R12);

nemit.VLD1(I_32, D0, R10);
nemit.VREV32(I_8, D0, D0);

MOVI2F(S1, 1.0f, INVALID_REG); // Temp reg isn't used for 1.0f
MOV(_PC, _LR);

const u8* loadPairedU8Two = GetCodePtr();
BKPT(0x13);
const u8* loadPairedU8One = GetCodePtr();
Expand Down

0 comments on commit ba0c52b

Please sign in to comment.