diff --git a/GPU/Common/ReplacedTexture.h b/GPU/Common/ReplacedTexture.h index 4273ffead53a..7474bae11523 100644 --- a/GPU/Common/ReplacedTexture.h +++ b/GPU/Common/ReplacedTexture.h @@ -128,7 +128,7 @@ class ReplacedTexture { void GetSize(int level, int *w, int *h) const { _dbg_assert_(State() == ReplacementState::ACTIVE); - _dbg_assert_(level < levels_.size()); + _dbg_assert_((size_t)level < levels_.size()); *w = levels_[level].fullW; *h = levels_[level].fullH; } diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 3340961f8ea6..5828689ea3a6 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -60,6 +60,7 @@ static const X64Reg tempReg3 = R10; static const X64Reg srcReg = RCX; static const X64Reg dstReg = RDX; static const X64Reg counterReg = R8; +static const X64Reg alphaReg = R11; #else static const X64Reg tempReg1 = RAX; static const X64Reg tempReg2 = R9; @@ -67,6 +68,7 @@ static const X64Reg tempReg3 = R10; static const X64Reg srcReg = RDI; static const X64Reg dstReg = RSI; static const X64Reg counterReg = RDX; +static const X64Reg alphaReg = R11; #endif #else static const X64Reg tempReg1 = EAX; @@ -201,6 +203,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int MOVUPS(MDisp(ESP, 80), XMM9); #endif + // Initialize alpha reg. +#if PPSSPP_ARCH(AMD64) + if (dec.col) { + MOV(32, R(alphaReg), Imm32(1)); + } +#endif + bool prescaleStep = false; // Look for prescaled texcoord steps for (int i = 0; i < dec.numSteps_; i++) { @@ -243,6 +252,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int } // Keep the scale/offset in a few fp registers if we need it. + // TODO: Read it from an argument pointer instead of gstate_c.uv. if (prescaleStep) { MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.uv)); MOVUPS(fpScaleOffsetReg, MatR(tempReg1)); @@ -271,6 +281,21 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int SUB(32, R(counterReg), Imm8(1)); J_CC(CC_NZ, loopStart, true); + // Writeback alpha reg +#if PPSSPP_ARCH(AMD64) + if (dec.col) { + CMP(32, R(alphaReg), Imm32(1)); + FixupBranch alphaJump = J_CC(CC_E, false); + if (RipAccessible(&gstate_c.vertexFullAlpha)) { + MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible + } else { + MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha)); + MOV(8, MatR(tempReg1), Imm8(0)); // rip accessible + } + SetJumpTarget(alphaJump); + } +#endif + MOVUPS(XMM4, MDisp(ESP, 0)); MOVUPS(XMM5, MDisp(ESP, 16)); MOVUPS(XMM6, MDisp(ESP, 32)); @@ -930,12 +955,17 @@ void VertexDecoderJitCache::Jit_Color8888() { CMP(32, R(tempReg1), Imm32(0xFF000000)); FixupBranch skip = J_CC(CC_AE, false); +#if PPSSPP_ARCH(AMD64) + // Would like to use CMOV or SetCC but CMOV doesn't take immediates and SetCC isn't right. So... + XOR(32, R(alphaReg), R(alphaReg)); +#else if (RipAccessible(&gstate_c.vertexFullAlpha)) { MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible } else { MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha)); MOV(8, MatR(tempReg1), Imm8(0)); } +#endif SetJumpTarget(skip); } @@ -965,12 +995,16 @@ void VertexDecoderJitCache::Jit_Color4444() { CMP(32, R(tempReg1), Imm32(0xFF000000)); FixupBranch skip = J_CC(CC_AE, false); +#if PPSSPP_ARCH(AMD64) + XOR(32, R(alphaReg), R(alphaReg)); +#else if (RipAccessible(&gstate_c.vertexFullAlpha)) { MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible } else { MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha)); MOV(8, MatR(tempReg1), Imm8(0)); } +#endif SetJumpTarget(skip); } @@ -1042,14 +1076,18 @@ void VertexDecoderJitCache::Jit_Color5551() { MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2)); - // Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits. + // Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits, and they're all equal. SHR(32, R(tempReg1), Imm8(24)); +#if PPSSPP_ARCH(AMD64) + AND(8, R(alphaReg), R(tempReg1)); +#else if (RipAccessible(&gstate_c.vertexFullAlpha)) { AND(8, M(&gstate_c.vertexFullAlpha), R(tempReg1)); // rip accessible } else { MOV(PTRBITS, R(tempReg3), ImmPtr(&gstate_c.vertexFullAlpha)); AND(8, MatR(tempReg3), R(tempReg1)); } +#endif } void VertexDecoderJitCache::Jit_Color8888Morph() { @@ -1258,12 +1296,16 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) { if (checkAlpha) { CMP(32, R(tempReg1), Imm32(0xFF000000)); FixupBranch skip = J_CC(CC_AE, false); +#if PPSSPP_ARCH(AMD64) + XOR(32, R(alphaReg), R(alphaReg)); +#else if (RipAccessible(&gstate_c.vertexFullAlpha)) { MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible } else { MOV(PTRBITS, R(tempReg2), ImmPtr(&gstate_c.vertexFullAlpha)); MOV(8, MatR(tempReg2), Imm8(0)); } +#endif SetJumpTarget(skip); } else { // Force alpha to full if we're not checking it.