Skip to content
Permalink
Browse files

Merge pull request #5710 from hrydgard/avoid-alpha-test

Avoid alpha test when vertexFullAlpha && textureFullAlpha
  • Loading branch information...
hrydgard committed Mar 24, 2014
2 parents 382db79 + dc07d34 commit ff498ed63bb4894844e52300fd9d3699a14ba302
@@ -64,11 +64,20 @@ static bool IsAlphaTestTriviallyTrue() {
return true;

case GE_COMP_GEQUAL:
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
return true; // If alpha is full, it doesn't matter what the ref value is.
return gstate.getAlphaTestRef() == 0;

// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
// Speeds up Lumines by a LOT on PowerVR.
case GE_COMP_NOTEQUAL:
if ((gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed())) && gstate.getAlphaTestRef() == 255) {
// Likely to be rare. Let's just have the alpha test take care of this instead of adding
// complicated code to discard the draw or whatnot.
return false;
}
// Fallthrough on purpose

case GE_COMP_GREATER:
{
#if 0
@@ -265,11 +265,6 @@ void TransformDrawEngine::SetupVertexDecoder(u32 vertType) {
if (vertTypeID != lastVType_) {
dec_ = GetVertexDecoder(vertTypeID);
lastVType_ = vertTypeID;

// TODO: Add functionality to VertexDecoder to scan for non-full alpha in the two other formats,
// which are quite common.
int colorType = vertTypeID & GE_VTYPE_COL_MASK;
gstate_c.vertexFullAlpha = colorType == GE_VTYPE_COL_NONE || colorType == GE_VTYPE_COL_565;
}
}

@@ -566,6 +561,8 @@ void TransformDrawEngine::DoFlush() {
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0;

goto rotateVBO;
}

@@ -645,6 +642,8 @@ void TransformDrawEngine::DoFlush() {
vertexCount = vai->numVerts;
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);

gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
break;
}

@@ -665,6 +664,8 @@ void TransformDrawEngine::DoFlush() {
vertexCount = vai->numVerts;
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);

gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
break;
}

@@ -698,6 +699,12 @@ void TransformDrawEngine::DoFlush() {
}

VERBOSE_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}

LinkedShader *program = shaderManager_->ApplyFragmentShader(vshader, prim, lastVType_);
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded);
@@ -717,6 +724,13 @@ void TransformDrawEngine::DoFlush() {
glBindBuffer(GL_ARRAY_BUFFER, 0);
} else {
DecodeVerts();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}

LinkedShader *program = shaderManager_->ApplyFragmentShader(vshader, prim, lastVType_);
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
@@ -737,6 +751,7 @@ void TransformDrawEngine::DoFlush() {
decodeCounter_ = 0;
dcid_ = 0;
prevPrim_ = GE_PRIM_INVALID;
gstate_c.vertexFullAlpha = true;

#ifndef MOBILE_DEVICE
host->GPUNotifyDraw();
@@ -43,6 +43,10 @@ struct DecVtxFormat;
// DRAWN_ONCE -> death
// DRAWN_RELIABLE -> death

enum {
VAI_FLAG_VERTEXFULLALPHA = 1,
};

// Try to keep this POD.
class VertexArrayInfo {
public:
@@ -57,6 +61,7 @@ class VertexArrayInfo {
lastFrame = gpuStats.numFlips;
numVerts = 0;
drawsUntilNextFullHash = 0;
flags = 0;
}
~VertexArrayInfo();

@@ -85,6 +90,7 @@ class VertexArrayInfo {
int numFrames;
int lastFrame; // So that we can forget.
u16 drawsUntilNextFullHash;
u8 flags;
};

// Handles transform, lighting and drawing.
@@ -219,6 +219,7 @@ void VertexDecoder::Step_Color565() const
c[1] = Convert6To8((cdata>>5) & 0x3f);
c[2] = Convert5To8((cdata>>11) & 0x1f);
c[3] = 255;
// Always full alpha.
}

void VertexDecoder::Step_Color5551() const
@@ -229,6 +230,7 @@ void VertexDecoder::Step_Color5551() const
c[1] = Convert5To8((cdata>>5) & 0x1f);
c[2] = Convert5To8((cdata>>10) & 0x1f);
c[3] = (cdata >> 15) ? 255 : 0;
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0;
}

void VertexDecoder::Step_Color4444() const
@@ -237,13 +239,15 @@ void VertexDecoder::Step_Color4444() const
u16 cdata = *(u16*)(ptr_ + coloff);
for (int j = 0; j < 4; j++)
c[j] = Convert4To8((cdata >> (j * 4)) & 0xF);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color8888() const
{
u8 *c = decoded_ + decFmt.c0off;
const u8 *cdata = (const u8*)(ptr_ + coloff);
memcpy(c, cdata, sizeof(u8) * 4);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color565Morph() const
@@ -262,6 +266,7 @@ void VertexDecoder::Step_Color565Morph() const
c[i] = (u8)col[i];
}
c[3] = 255;
// Always full alpha.
}

void VertexDecoder::Step_Color5551Morph() const
@@ -280,6 +285,7 @@ void VertexDecoder::Step_Color5551Morph() const
for (int i = 0; i < 4; i++) {
c[i] = (u8)col[i];
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color4444Morph() const
@@ -296,6 +302,7 @@ void VertexDecoder::Step_Color4444Morph() const
for (int i = 0; i < 4; i++) {
c[i] = (u8)col[i];
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_Color8888Morph() const
@@ -312,6 +319,7 @@ void VertexDecoder::Step_Color8888Morph() const
for (int i = 0; i < 4; i++) {
c[i] = (u8)(col[i]);
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}

void VertexDecoder::Step_NormalS8() const
@@ -841,6 +849,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowe
jitted_(ptr_, decoded_, count);
} else {
// Interpret the decode steps
// TODO: Init gstate_c.vertexFullAlpha here? Or in Setup? When is it reset?
for (; count; count--) {
for (int i = 0; i < numSteps_; i++) {
((*this).*steps_[i])();
@@ -266,6 +266,6 @@ class VertexDecoderJitCache : public Gen::XCodeBlock {
bool CompileStep(const VertexDecoder &dec, int i);
void Jit_ApplyWeights();
void Jit_WriteMatrixMul(int outOff, bool pos);
void Jit_WriteMorphColor(int outOff);
void Jit_WriteMorphColor(int outOff, bool checkAlpha = true);
const VertexDecoder *dec_;
};
@@ -61,7 +61,8 @@ static const ARMReg tempReg2 = R4;
static const ARMReg tempReg3 = R5;
static const ARMReg scratchReg = R6;
static const ARMReg scratchReg2 = R7;
static const ARMReg scratchReg3 = R12;
static const ARMReg scratchReg3 = R8;
static const ARMReg fullAlphaReg = R12;
static const ARMReg srcReg = R0;
static const ARMReg dstReg = R1;
static const ARMReg counterReg = R2;
@@ -262,6 +263,11 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
// TODO: Preload scale factors
}

if (dec.col) {
// Or LDB and skip the conditional? This is probably cheaper.
MOV(fullAlphaReg, 0xFF);
}

JumpTarget loopStart = GetCodePtr();
// Preload data cache ahead of reading. This offset seems pretty good.
PLD(srcReg, 64);
@@ -281,6 +287,14 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
SUBS(counterReg, counterReg, 1);
B_CC(CC_NEQ, loopStart);

if (dec.col) {
MOVP2R(tempReg1, &gstate_c.textureFullAlpha);
CMP(fullAlphaReg, 0);
SetCC(CC_EQ);
STRB(fullAlphaReg, tempReg1, 0);
SetCC(CC_AL);
}

if (NEONSkinning || NEONMorphing) {
VPOP(D8, 8);
}
@@ -664,7 +678,12 @@ void VertexDecoderJitCache::Jit_TcFloatPrescale() {

void VertexDecoderJitCache::Jit_Color8888() {
LDR(tempReg1, srcReg, dec_->coloff);
// Set flags to determine if alpha != 0xFF.
MVNS(tempReg2, Operand2(tempReg1, ST_ASR, 24));
STR(tempReg1, dstReg, dec_->decFmt.c0off);
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}

void VertexDecoderJitCache::Jit_Color4444() {
@@ -679,10 +698,16 @@ void VertexDecoderJitCache::Jit_Color4444() {
ANDI2R(tempReg3, tempReg1, 0xF000, scratchReg);
ORR(tempReg2, tempReg2, Operand2(tempReg3, ST_LSL, 12));

// And saturate.
// And expand to 8 bits.
ORR(tempReg1, tempReg2, Operand2(tempReg2, ST_LSL, 4));

STR(tempReg1, dstReg, dec_->decFmt.c0off);

// Set flags to determine if alpha != 0xFF.
MVNS(tempReg2, Operand2(tempReg1, ST_ASR, 24));
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}

void VertexDecoderJitCache::Jit_Color565() {
@@ -706,7 +731,7 @@ void VertexDecoderJitCache::Jit_Color565() {
ORR(tempReg3, tempReg3, Operand2(tempReg1, ST_LSR, 4));
ORR(tempReg2, tempReg2, Operand2(tempReg3, ST_LSL, 8));

// Add in full alpha.
// Add in full alpha. No need to update fullAlphaReg.
ORI2R(tempReg1, tempReg2, 0xFF000000, scratchReg);

STR(tempReg1, dstReg, dec_->decFmt.c0off);
@@ -731,8 +756,13 @@ void VertexDecoderJitCache::Jit_Color5551() {
// Now we just need alpha. Since we loaded as signed, it'll be extended.
ANDI2R(tempReg1, tempReg1, 0xFF000000, scratchReg);
ORR(tempReg2, tempReg2, tempReg1);


// Set flags to determine if alpha != 0xFF.
MVNS(tempReg3, Operand2(tempReg1, ST_ASR, 24));
STR(tempReg2, dstReg, dec_->decFmt.c0off);
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}

void VertexDecoderJitCache::Jit_Color8888Morph() {
@@ -957,7 +987,7 @@ void VertexDecoderJitCache::Jit_Color565Morph() {
} else {
VMOV(S11, tempReg3);
}
Jit_WriteMorphColor(dec_->decFmt.c0off);
Jit_WriteMorphColor(dec_->decFmt.c0off, false);
}

// First is the left shift, second is the right shift (against walls, to get the RGBA values.)
@@ -1045,13 +1075,16 @@ void VertexDecoderJitCache::Jit_Color5551Morph() {
}

// Expects RGBA color in S8 - S11, which is Q2.
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
if (NEONMorphing) {
ADDI2R(tempReg1, dstReg, outOff, scratchReg);
VCVT(I_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
VQMOVN(I_32 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
VQMOVN(I_16 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
VST1_lane(I_32, neonScratchReg, tempReg1, 0, true);
if (checkAlpha) {
VMOV_neon(I_32, scratchReg, neonScratchReg, 0);
}
} else {
VCVT(S8, S8, TO_INT);
VCVT(S9, S9, TO_INT);
@@ -1066,6 +1099,14 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
ORR(scratchReg, scratchReg, Operand2(tempReg3, ST_LSL, 24));
STR(scratchReg, dstReg, outOff);
}

// Set flags to determine if alpha != 0xFF.
if (checkAlpha) {
MVNS(tempReg2, Operand2(scratchReg, ST_ASR, 24));
SetCC(CC_NEQ);
MOV(fullAlphaReg, 0);
SetCC(CC_AL);
}
}

void VertexDecoderJitCache::Jit_NormalS8() {

0 comments on commit ff498ed

Please sign in to comment.
You can’t perform that action at this time.