Skip to content

Commit

Permalink
Remove some rip addressing in vertex decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Jul 7, 2017
1 parent 077fafb commit 442a701
Showing 1 changed file with 48 additions and 12 deletions.
60 changes: 48 additions & 12 deletions GPU/Common/VertexDecoderX86.cpp
Expand Up @@ -478,9 +478,17 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
CVTDQ2PS(XMM8, R(XMM8));
if (dec_->nweights > 4)
CVTDQ2PS(XMM9, R(XMM9));
MULPS(XMM8, M(&by128));
if (dec_->nweights > 4)
MULPS(XMM9, M(&by128));

if (RipAccessible(&by128)) {
MULPS(XMM8, M(&by128)); // rip accessible
if (dec_->nweights > 4)
MULPS(XMM9, M(&by128)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
MULPS(XMM8, MatR(tempReg1));
if (dec_->nweights > 4)
MULPS(XMM9, MatR(tempReg1));
}

auto weightToAllLanes = [this](X64Reg dst, int lane) {
X64Reg src = lane < 4 ? XMM8 : XMM9;
Expand Down Expand Up @@ -521,7 +529,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
#else
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->weightoff + j));
CVTSI2SS(weight, R(tempReg1));
MULSS(weight, M(&by128));
MULSS(weight, M(&by128)); // rip accessible (x86)
SHUFPS(weight, R(weight), _MM_SHUFFLE(0, 0, 0, 0));
#endif
if (j == 0) {
Expand Down Expand Up @@ -584,9 +592,17 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
CVTDQ2PS(XMM8, R(XMM8));
if (dec_->nweights > 4)
CVTDQ2PS(XMM9, R(XMM9));
MULPS(XMM8, M(&by32768));
if (dec_->nweights > 4)
MULPS(XMM9, M(&by32768));

if (RipAccessible(&by32768)) {
MULPS(XMM8, M(&by32768)); // rip accessible
if (dec_->nweights > 4)
MULPS(XMM9, M(&by32768)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&by32768));
MULPS(XMM8, MatR(tempReg1));
if (dec_->nweights > 4)
MULPS(XMM9, MatR(tempReg1));
}

auto weightToAllLanes = [this](X64Reg dst, int lane) {
X64Reg src = lane < 4 ? XMM8 : XMM9;
Expand Down Expand Up @@ -627,7 +643,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
#else
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff + j * 2));
CVTSI2SS(weight, R(tempReg1));
MULSS(weight, M(&by32768));
MULSS(weight, M(&by32768)); // rip accessible (x86)
SHUFPS(weight, R(weight), _MM_SHUFFLE(0, 0, 0, 0));
#endif
if (j == 0) {
Expand Down Expand Up @@ -1364,7 +1380,12 @@ void VertexDecoderJitCache::Jit_AnyS8ToFloat(int srcoff) {
PSRAD(XMM1, 24);
}
CVTDQ2PS(XMM3, R(XMM1));
MULPS(XMM3, M(&by128));
if (RipAccessible(&by128)) {
MULPS(XMM3, M(&by128));
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
MULPS(XMM3, MatR(tempReg1));
}
}

void VertexDecoderJitCache::Jit_AnyS16ToFloat(int srcoff) {
Expand Down Expand Up @@ -1407,7 +1428,12 @@ void VertexDecoderJitCache::Jit_AnyU8ToFloat(int srcoff, u32 bits) {
PUNPCKLWD(XMM1, R(XMM3));
}
CVTDQ2PS(XMM3, R(XMM1));
MULPS(XMM3, M(&by128));
if (RipAccessible(&by128)) {
MULPS(XMM3, M(&by128));
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
MULPS(XMM3, MatR(tempReg1));
}
}

void VertexDecoderJitCache::Jit_AnyU16ToFloat(int srcoff, u32 bits) {
Expand Down Expand Up @@ -1442,7 +1468,12 @@ void VertexDecoderJitCache::Jit_AnyS8Morph(int srcoff, int dstoff) {
if (!cpu_info.bSSE4_1) {
PXOR(fpScratchReg4, R(fpScratchReg4));
}
MOVAPS(XMM5, M(by128));
if (RipAccessible(&by128)) {
MOVAPS(XMM5, M(&by128));
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&by128));
MOVAPS(XMM5, MatR(tempReg1));
}

// Sum into fpScratchReg.
bool first = true;
Expand Down Expand Up @@ -1481,7 +1512,12 @@ void VertexDecoderJitCache::Jit_AnyS16Morph(int srcoff, int dstoff) {
if (!cpu_info.bSSE4_1) {
PXOR(fpScratchReg4, R(fpScratchReg4));
}
MOVAPS(XMM5, M(by32768));
if (RipAccessible(&by32768)) {
MOVAPS(XMM5, M(&by32768));
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&by32768));
MOVAPS(XMM5, MatR(tempReg1));
}

// Sum into fpScratchReg.
bool first = true;
Expand Down

0 comments on commit 442a701

Please sign in to comment.