Skip to content

Commit

Permalink
Merge pull request #5259 from MerryMage/quantload
Browse files Browse the repository at this point in the history
Jit64: Make psq_lXX PIE-compliant
  • Loading branch information
degasus committed Apr 15, 2017
2 parents fb805e3 + cac7752 commit 8d4be36
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 20 deletions.
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ void Jit64AsmRoutineManager::GenerateCommon()
GenMfcr();

GenQuantizedLoads();
GenQuantizedSingleLoads();
GenQuantizedStores();
GenQuantizedSingleStores();

Expand Down
12 changes: 7 additions & 5 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,16 +148,18 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
}
else
{
MOV(32, R(RSCRATCH2), Imm32(0x3F07));

// Get the high part of the GQR register
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + i]);
gqr.AddMemOffset(2);

MOV(32, R(RSCRATCH2), Imm32(0x3F07));
AND(32, R(RSCRATCH2), gqr);
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));

CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(&asm_routines.pairedLoadQuantized[w * 8])));
LEA(64, RSCRATCH, M(w ? asm_routines.singleLoadQuantized : asm_routines.pairedLoadQuantized));
// 8-bit operations do not zero upper 32-bits of 64-bit registers.
// Here we know that RSCRATCH's least significant byte is zero.
OR(8, R(RSCRATCH), R(RSCRATCH2));
SHL(8, R(RSCRATCH), Imm8(3));
CALLptr(MatR(RSCRATCH));
}

CVTPS2PD(fpr.RX(s), R(XMM0));
Expand Down
15 changes: 12 additions & 3 deletions Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,22 @@ const u8* CommonAsmRoutines::GenQuantizedStoreRuntime(bool single, EQuantizeType

void CommonAsmRoutines::GenQuantizedLoads()
{
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
// Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_lXX).
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCodeTo(256)));
ReserveCodeSpace(8 * sizeof(u8*));

for (int type = 0; type < 8; type++)
pairedLoadQuantized[type] = GenQuantizedLoadRuntime(false, static_cast<EQuantizeType>(type));
}

void CommonAsmRoutines::GenQuantizedSingleLoads()
{
// Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_lXX).
singleLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCodeTo(256)));
ReserveCodeSpace(8 * sizeof(u8*));

for (int type = 0; type < 8; type++)
pairedLoadQuantized[type + 8] = GenQuantizedLoadRuntime(true, static_cast<EQuantizeType>(type));
singleLoadQuantized[type] = GenQuantizedLoadRuntime(true, static_cast<EQuantizeType>(type));
}

const u8* CommonAsmRoutines::GenQuantizedLoadRuntime(bool single, EQuantizeType type)
Expand Down
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class CommonAsmRoutines : public CommonAsmRoutinesBase, public QuantizedMemoryRo
const u8* GenQuantizedLoadRuntime(bool single, EQuantizeType type);
const u8* GenQuantizedStoreRuntime(bool single, EQuantizeType type);
void GenQuantizedLoads();
void GenQuantizedSingleLoads();
void GenQuantizedStores();
void GenQuantizedSingleStores();
};
6 changes: 4 additions & 2 deletions Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1616,10 +1616,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->MOV(32, R(RSCRATCH2), Imm32(0x3F07));
Jit->AND(32, R(RSCRATCH2), M(((char*)&GQR(quantreg)) + 2));
Jit->MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
Jit->OR(32, R(RSCRATCH), Imm8(w << 3));

const u8** table =
w ? Jit->asm_routines.singleLoadQuantized : Jit->asm_routines.pairedLoadQuantized;

Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I)));
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedLoadQuantized)));
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)table));
Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I;
regNormalRegClear(RI, I);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(type_reg, scale_reg, 16, 18); // Type
UBFM(scale_reg, scale_reg, 24, 29); // Scale

MOVP2R(X30, &pairedLoadQuantized[inst.W * 8]);
MOVP2R(X30, inst.W ? singleLoadQuantized : pairedLoadQuantized);
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(X30);

Expand Down
21 changes: 12 additions & 9 deletions Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ void JitArm64::GenerateCommonAsm()
JitRegister::Register(start, GetCodePtr(), "JIT_QuantizedLoad");

pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
ReserveCodeSpace(8 * sizeof(u8*));

pairedLoadQuantized[0] = loadPairedFloatTwo;
pairedLoadQuantized[1] = loadPairedIllegal;
Expand All @@ -347,14 +347,17 @@ void JitArm64::GenerateCommonAsm()
pairedLoadQuantized[6] = loadPairedS8Two;
pairedLoadQuantized[7] = loadPairedS16Two;

pairedLoadQuantized[8] = loadPairedFloatOne;
pairedLoadQuantized[9] = loadPairedIllegal;
pairedLoadQuantized[10] = loadPairedIllegal;
pairedLoadQuantized[11] = loadPairedIllegal;
pairedLoadQuantized[12] = loadPairedU8One;
pairedLoadQuantized[13] = loadPairedU16One;
pairedLoadQuantized[14] = loadPairedS8One;
pairedLoadQuantized[15] = loadPairedS16One;
singleLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(8 * sizeof(u8*));

singleLoadQuantized[0] = loadPairedFloatOne;
singleLoadQuantized[1] = loadPairedIllegal;
singleLoadQuantized[2] = loadPairedIllegal;
singleLoadQuantized[3] = loadPairedIllegal;
singleLoadQuantized[4] = loadPairedU8One;
singleLoadQuantized[5] = loadPairedU16One;
singleLoadQuantized[6] = loadPairedS8One;
singleLoadQuantized[7] = loadPairedS16One;

// Stores
start = GetCodePtr();
Expand Down
6 changes: 6 additions & 0 deletions Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ class CommonAsmRoutinesBase
// Trashes: all three RSCRATCH
const u8** pairedLoadQuantized;

// In: array index: GQR to use.
// In: ECX: Address to read from.
// Out: XMM0: Bottom 32-bit slot holds the read value.
// Trashes: all three RSCRATCH
const u8** singleLoadQuantized;

// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
Expand Down

0 comments on commit 8d4be36

Please sign in to comment.