Skip to content

Commit

Permalink
Merge pull request #17566 from hrydgard/vertex-decoder-color-opt
Browse files Browse the repository at this point in the history
x86-64 vertex decoder: Keep the vertex-full-alpha variable in a register
  • Loading branch information
hrydgard committed Jun 12, 2023
2 parents 94e3412 + d957f6b commit 17a723e
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
2 changes: 1 addition & 1 deletion GPU/Common/ReplacedTexture.h
Expand Up @@ -128,7 +128,7 @@ class ReplacedTexture {

void GetSize(int level, int *w, int *h) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
_dbg_assert_(level < levels_.size());
_dbg_assert_((size_t)level < levels_.size());
*w = levels_[level].fullW;
*h = levels_[level].fullH;
}
Expand Down
44 changes: 43 additions & 1 deletion GPU/Common/VertexDecoderX86.cpp
Expand Up @@ -60,13 +60,15 @@ static const X64Reg tempReg3 = R10;
static const X64Reg srcReg = RCX;
static const X64Reg dstReg = RDX;
static const X64Reg counterReg = R8;
static const X64Reg alphaReg = R11;
#else
static const X64Reg tempReg1 = RAX;
static const X64Reg tempReg2 = R9;
static const X64Reg tempReg3 = R10;
static const X64Reg srcReg = RDI;
static const X64Reg dstReg = RSI;
static const X64Reg counterReg = RDX;
static const X64Reg alphaReg = R11;
#endif
#else
static const X64Reg tempReg1 = EAX;
Expand Down Expand Up @@ -201,6 +203,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
MOVUPS(MDisp(ESP, 80), XMM9);
#endif

// Initialize alpha reg.
#if PPSSPP_ARCH(AMD64)
if (dec.col) {
MOV(32, R(alphaReg), Imm32(1));
}
#endif

bool prescaleStep = false;
// Look for prescaled texcoord steps
for (int i = 0; i < dec.numSteps_; i++) {
Expand Down Expand Up @@ -243,6 +252,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
}

// Keep the scale/offset in a few fp registers if we need it.
// TODO: Read it from an argument pointer instead of gstate_c.uv.
if (prescaleStep) {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.uv));
MOVUPS(fpScaleOffsetReg, MatR(tempReg1));
Expand Down Expand Up @@ -271,6 +281,21 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
SUB(32, R(counterReg), Imm8(1));
J_CC(CC_NZ, loopStart, true);

// Writeback alpha reg
#if PPSSPP_ARCH(AMD64)
if (dec.col) {
CMP(32, R(alphaReg), Imm32(1));
FixupBranch alphaJump = J_CC(CC_E, false);
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg1), Imm8(0)); // rip accessible
}
SetJumpTarget(alphaJump);
}
#endif

MOVUPS(XMM4, MDisp(ESP, 0));
MOVUPS(XMM5, MDisp(ESP, 16));
MOVUPS(XMM6, MDisp(ESP, 32));
Expand Down Expand Up @@ -930,12 +955,17 @@ void VertexDecoderJitCache::Jit_Color8888() {

CMP(32, R(tempReg1), Imm32(0xFF000000));
FixupBranch skip = J_CC(CC_AE, false);
#if PPSSPP_ARCH(AMD64)
// Would like to use CMOV or SetCC but CMOV doesn't take immediates and SetCC isn't right. So...
XOR(32, R(alphaReg), R(alphaReg));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg1), Imm8(0));
}
#endif
SetJumpTarget(skip);
}

Expand Down Expand Up @@ -965,12 +995,16 @@ void VertexDecoderJitCache::Jit_Color4444() {

CMP(32, R(tempReg1), Imm32(0xFF000000));
FixupBranch skip = J_CC(CC_AE, false);
#if PPSSPP_ARCH(AMD64)
XOR(32, R(alphaReg), R(alphaReg));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg1), Imm8(0));
}
#endif
SetJumpTarget(skip);
}

Expand Down Expand Up @@ -1042,14 +1076,18 @@ void VertexDecoderJitCache::Jit_Color5551() {

MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2));

// Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits.
// Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits, and they're all equal.
SHR(32, R(tempReg1), Imm8(24));
#if PPSSPP_ARCH(AMD64)
AND(8, R(alphaReg), R(tempReg1));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
AND(8, M(&gstate_c.vertexFullAlpha), R(tempReg1)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg3), ImmPtr(&gstate_c.vertexFullAlpha));
AND(8, MatR(tempReg3), R(tempReg1));
}
#endif
}

void VertexDecoderJitCache::Jit_Color8888Morph() {
Expand Down Expand Up @@ -1258,12 +1296,16 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
if (checkAlpha) {
CMP(32, R(tempReg1), Imm32(0xFF000000));
FixupBranch skip = J_CC(CC_AE, false);
#if PPSSPP_ARCH(AMD64)
XOR(32, R(alphaReg), R(alphaReg));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg2), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg2), Imm8(0));
}
#endif
SetJumpTarget(skip);
} else {
// Force alpha to full if we're not checking it.
Expand Down

0 comments on commit 17a723e

Please sign in to comment.