Skip to content

Commit

Permalink
Merge pull request #5710 from hrydgard/avoid-alpha-test
Browse files Browse the repository at this point in the history
Avoid alpha test when vertexFullAlpha && textureFullAlpha
  • Loading branch information
hrydgard committed Mar 24, 2014
2 parents 382db79 + dc07d34 commit ff498ed
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 26 deletions.
9 changes: 9 additions & 0 deletions GPU/GLES/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,20 @@ static bool IsAlphaTestTriviallyTrue() {
return true;

case GE_COMP_GEQUAL:
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
return true; // If alpha is full, it doesn't matter what the ref value is.
return gstate.getAlphaTestRef() == 0;

// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
// Speeds up Lumines by a LOT on PowerVR.
case GE_COMP_NOTEQUAL:
if ((gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed())) && gstate.getAlphaTestRef() == 255) {
// Likely to be rare. Let's just have the alpha test take care of this instead of adding
// complicated code to discard the draw or whatnot.
return false;
}
// Fallthrough on purpose

case GE_COMP_GREATER:
{
#if 0
Expand Down
25 changes: 20 additions & 5 deletions GPU/GLES/TransformPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,6 @@ void TransformDrawEngine::SetupVertexDecoder(u32 vertType) {
if (vertTypeID != lastVType_) {
dec_ = GetVertexDecoder(vertTypeID);
lastVType_ = vertTypeID;

// TODO: Add functionality to VertexDecoder to scan for non-full alpha in the two other formats,
// which are quite common.
int colorType = vertTypeID & GE_VTYPE_COL_MASK;
gstate_c.vertexFullAlpha = colorType == GE_VTYPE_COL_NONE || colorType == GE_VTYPE_COL_565;
}
}

Expand Down Expand Up @@ -566,6 +561,8 @@ void TransformDrawEngine::DoFlush() {
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0;

goto rotateVBO;
}

Expand Down Expand Up @@ -645,6 +642,8 @@ void TransformDrawEngine::DoFlush() {
vertexCount = vai->numVerts;
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);

gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
break;
}

Expand All @@ -665,6 +664,8 @@ void TransformDrawEngine::DoFlush() {
vertexCount = vai->numVerts;
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);

gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
break;
}

Expand Down Expand Up @@ -698,6 +699,12 @@ void TransformDrawEngine::DoFlush() {
}

VERBOSE_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}

LinkedShader *program = shaderManager_->ApplyFragmentShader(vshader, prim, lastVType_);
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded);
Expand All @@ -717,6 +724,13 @@ void TransformDrawEngine::DoFlush() {
glBindBuffer(GL_ARRAY_BUFFER, 0);
} else {
DecodeVerts();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}

LinkedShader *program = shaderManager_->ApplyFragmentShader(vshader, prim, lastVType_);
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
Expand All @@ -737,6 +751,7 @@ void TransformDrawEngine::DoFlush() {
decodeCounter_ = 0;
dcid_ = 0;
prevPrim_ = GE_PRIM_INVALID;
gstate_c.vertexFullAlpha = true;

#ifndef MOBILE_DEVICE
host->GPUNotifyDraw();
Expand Down
6 changes: 6 additions & 0 deletions GPU/GLES/TransformPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ struct DecVtxFormat;
// DRAWN_ONCE -> death
// DRAWN_RELIABLE -> death

enum {
VAI_FLAG_VERTEXFULLALPHA = 1,
};

// Try to keep this POD.
class VertexArrayInfo {
public:
Expand All @@ -57,6 +61,7 @@ class VertexArrayInfo {
lastFrame = gpuStats.numFlips;
numVerts = 0;
drawsUntilNextFullHash = 0;
flags = 0;
}
~VertexArrayInfo();

Expand Down Expand Up @@ -85,6 +90,7 @@ class VertexArrayInfo {
int numFrames;
int lastFrame; // So that we can forget.
u16 drawsUntilNextFullHash;
u8 flags;
};

// Handles transform, lighting and drawing.
Expand Down
9 changes: 9 additions & 0 deletions GPU/GLES/VertexDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ void VertexDecoder::Step_Color565() const
c[1] = Convert6To8((cdata>>5) & 0x3f);
c[2] = Convert5To8((cdata>>11) & 0x1f);
c[3] = 255;
// Always full alpha.
}

void VertexDecoder::Step_Color5551() const
Expand All @@ -229,6 +230,7 @@ void VertexDecoder::Step_Color5551() const
c[1] = Convert5To8((cdata>>5) & 0x1f);
c[2] = Convert5To8((cdata>>10) & 0x1f);
c[3] = (cdata >> 15) ? 255 : 0;
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0;
}

void VertexDecoder::Step_Color4444() const
Expand All @@ -237,13 +239,15 @@ void VertexDecoder::Step_Color4444() const
u16 cdata = *(u16*)(ptr_ + coloff);
for (int j = 0; j < 4; j++)
c[j] = Convert4To8((cdata >> (j * 4)) & 0xF);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color8888() const
{
u8 *c = decoded_ + decFmt.c0off;
const u8 *cdata = (const u8*)(ptr_ + coloff);
memcpy(c, cdata, sizeof(u8) * 4);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color565Morph() const
Expand All @@ -262,6 +266,7 @@ void VertexDecoder::Step_Color565Morph() const
c[i] = (u8)col[i];
}
c[3] = 255;
// Always full alpha.
}

void VertexDecoder::Step_Color5551Morph() const
Expand All @@ -280,6 +285,7 @@ void VertexDecoder::Step_Color5551Morph() const
for (int i = 0; i < 4; i++) {
c[i] = (u8)col[i];
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color4444Morph() const
Expand All @@ -296,6 +302,7 @@ void VertexDecoder::Step_Color4444Morph() const
for (int i = 0; i < 4; i++) {
c[i] = (u8)col[i];
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color8888Morph() const
Expand All @@ -312,6 +319,7 @@ void VertexDecoder::Step_Color8888Morph() const
for (int i = 0; i < 4; i++) {
c[i] = (u8)(col[i]);
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_NormalS8() const
Expand Down Expand Up @@ -841,6 +849,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowe
jitted_(ptr_, decoded_, count);
} else {
// Interpret the decode steps
// TODO: Init gstate_c.vertexFullAlpha here? Or in Setup? When is it reset?
for (; count; count--) {
for (int i = 0; i < numSteps_; i++) {
((*this).*steps_[i])();
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/VertexDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,6 @@ class VertexDecoderJitCache : public Gen::XCodeBlock {
bool CompileStep(const VertexDecoder &dec, int i);
void Jit_ApplyWeights();
void Jit_WriteMatrixMul(int outOff, bool pos);
void Jit_WriteMorphColor(int outOff);
void Jit_WriteMorphColor(int outOff, bool checkAlpha = true);
const VertexDecoder *dec_;
};
53 changes: 47 additions & 6 deletions GPU/GLES/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ static const ARMReg tempReg2 = R4;
static const ARMReg tempReg3 = R5;
static const ARMReg scratchReg = R6;
static const ARMReg scratchReg2 = R7;
static const ARMReg scratchReg3 = R12;
static const ARMReg scratchReg3 = R8;
static const ARMReg fullAlphaReg = R12;
static const ARMReg srcReg = R0;
static const ARMReg dstReg = R1;
static const ARMReg counterReg = R2;
Expand Down Expand Up @@ -262,6 +263,11 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
// TODO: Preload scale factors
}

if (dec.col) {
// Or LDB and skip the conditional? This is probably cheaper.
MOV(fullAlphaReg, 0xFF);
}

JumpTarget loopStart = GetCodePtr();
// Preload data cache ahead of reading. This offset seems pretty good.
PLD(srcReg, 64);
Expand All @@ -281,6 +287,14 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
SUBS(counterReg, counterReg, 1);
B_CC(CC_NEQ, loopStart);

if (dec.col) {
MOVP2R(tempReg1, &gstate_c.textureFullAlpha);
CMP(fullAlphaReg, 0);
SetCC(CC_EQ);
STRB(fullAlphaReg, tempReg1, 0);
SetCC(CC_AL);
}

if (NEONSkinning || NEONMorphing) {
VPOP(D8, 8);
}
Expand Down Expand Up @@ -664,7 +678,12 @@ void VertexDecoderJitCache::Jit_TcFloatPrescale() {

void VertexDecoderJitCache::Jit_Color8888() {
LDR(tempReg1, srcReg, dec_->coloff);
// Set flags to determine if alpha != 0xFF.
MVNS(tempReg2, Operand2(tempReg1, ST_ASR, 24));
STR(tempReg1, dstReg, dec_->decFmt.c0off);
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}

void VertexDecoderJitCache::Jit_Color4444() {
Expand All @@ -679,10 +698,16 @@ void VertexDecoderJitCache::Jit_Color4444() {
ANDI2R(tempReg3, tempReg1, 0xF000, scratchReg);
ORR(tempReg2, tempReg2, Operand2(tempReg3, ST_LSL, 12));

// And saturate.
// And expand to 8 bits.
ORR(tempReg1, tempReg2, Operand2(tempReg2, ST_LSL, 4));

STR(tempReg1, dstReg, dec_->decFmt.c0off);

// Set flags to determine if alpha != 0xFF.
MVNS(tempReg2, Operand2(tempReg1, ST_ASR, 24));
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}

void VertexDecoderJitCache::Jit_Color565() {
Expand All @@ -706,7 +731,7 @@ void VertexDecoderJitCache::Jit_Color565() {
ORR(tempReg3, tempReg3, Operand2(tempReg1, ST_LSR, 4));
ORR(tempReg2, tempReg2, Operand2(tempReg3, ST_LSL, 8));

// Add in full alpha.
// Add in full alpha. No need to update fullAlphaReg.
ORI2R(tempReg1, tempReg2, 0xFF000000, scratchReg);

STR(tempReg1, dstReg, dec_->decFmt.c0off);
Expand All @@ -731,8 +756,13 @@ void VertexDecoderJitCache::Jit_Color5551() {
// Now we just need alpha. Since we loaded as signed, it'll be extended.
ANDI2R(tempReg1, tempReg1, 0xFF000000, scratchReg);
ORR(tempReg2, tempReg2, tempReg1);


// Set flags to determine if alpha != 0xFF.
MVNS(tempReg3, Operand2(tempReg1, ST_ASR, 24));
STR(tempReg2, dstReg, dec_->decFmt.c0off);
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}

void VertexDecoderJitCache::Jit_Color8888Morph() {
Expand Down Expand Up @@ -957,7 +987,7 @@ void VertexDecoderJitCache::Jit_Color565Morph() {
} else {
VMOV(S11, tempReg3);
}
Jit_WriteMorphColor(dec_->decFmt.c0off);
Jit_WriteMorphColor(dec_->decFmt.c0off, false);
}

// First is the left shift, second is the right shift (against walls, to get the RGBA values.)
Expand Down Expand Up @@ -1045,13 +1075,16 @@ void VertexDecoderJitCache::Jit_Color5551Morph() {
}

// Expects RGBA color in S8 - S11, which is Q2.
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
if (NEONMorphing) {
ADDI2R(tempReg1, dstReg, outOff, scratchReg);
VCVT(I_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
VQMOVN(I_32 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
VQMOVN(I_16 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
VST1_lane(I_32, neonScratchReg, tempReg1, 0, true);
if (checkAlpha) {
VMOV_neon(I_32, scratchReg, neonScratchReg, 0);
}
} else {
VCVT(S8, S8, TO_INT);
VCVT(S9, S9, TO_INT);
Expand All @@ -1066,6 +1099,14 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
ORR(scratchReg, scratchReg, Operand2(tempReg3, ST_LSL, 24));
STR(scratchReg, dstReg, outOff);
}

// Set flags to determine if alpha != 0xFF.
if (checkAlpha) {
MVNS(tempReg2, Operand2(scratchReg, ST_ASR, 24));
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}
}

void VertexDecoderJitCache::Jit_NormalS8() {
Expand Down
Loading

0 comments on commit ff498ed

Please sign in to comment.