Skip to content

Commit

Permalink
Avoid decoding indices when we don't need them.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Dec 20, 2023
1 parent d9c6d09 commit 61acce1
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 207 deletions.
32 changes: 24 additions & 8 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -816,6 +816,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
_dbg_assert_(numDrawInds_ <= MAX_DEFERRED_DRAW_INDS); // if it's equal, the check below will take care of it before any action is taken.
_dbg_assert_(numDrawVerts_ > 0);

if (!clockwise) {
anyCCWOrIndexed_ = true;
}
int seenPrims = 0;
while (cmd != stall) {
uint32_t data = *cmd;
if ((data & 0xFFF80000) != 0x04000000) {
Expand All @@ -831,6 +835,7 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
DeferredInds &di = drawInds_[numDrawInds_++];
di.indexType = 0;
di.prim = newPrim;
seenPrims |= (1 << newPrim);
di.clockwise = clockwise;
di.vertexCount = vertexCount;
di.vertDecodeIndex = prevDrawVerts;
Expand All @@ -839,6 +844,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
cmd++;
}

seenPrims_ |= seenPrims;

_dbg_assert_(cmd != start);

int totalCount = offset - dv.vertexCount;
dv.vertexCount = offset;
dv.indexUpperBound = dv.vertexCount - 1;
Expand Down Expand Up @@ -910,9 +919,16 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti

DeferredInds &di = drawInds_[numDrawInds_++];
di.inds = inds;
di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
int indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
if (indexType) {
anyCCWOrIndexed_ = true;
}
di.indexType = indexType;
di.prim = prim;
di.clockwise = clockwise;
if (!clockwise) {
anyCCWOrIndexed_ = true;
}
di.vertexCount = vertexCount;
di.vertDecodeIndex = numDrawVerts_;
di.offset = 0;
Expand Down Expand Up @@ -942,6 +958,7 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}

vertexCountInDrawCalls_ += vertexCount;
seenPrims_ |= (1 << prim);

if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
// This prevents issues with consecutive self-renders in Ridge Racer.
Expand All @@ -952,6 +969,8 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}

void DrawEngineCommon::DecodeVerts(u8 *dest) {
// Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time).

int i = decodeVertsCounter_;
int stride = (int)dec_->GetDecVtxFmt().stride;
for (; i < numDrawVerts_; i++) {
Expand All @@ -968,7 +987,9 @@ void DrawEngineCommon::DecodeVerts(u8 *dest) {
decodeVertsCounter_ = i;
}

void DrawEngineCommon::DecodeInds() {
int DrawEngineCommon::DecodeInds() {
// Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time).

int i = decodeIndsCounter_;
for (; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
Expand All @@ -994,12 +1015,7 @@ void DrawEngineCommon::DecodeInds() {
}
decodeIndsCounter_ = i;

// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0, 0, true);
}
return indexGen.VertexCount();
}

bool DrawEngineCommon::CanUseHardwareTransform(int prim) {
Expand Down
44 changes: 39 additions & 5 deletions GPU/Common/DrawEngineCommon.h
Expand Up @@ -152,7 +152,7 @@ class DrawEngineCommon {
void UpdatePlanes();

void DecodeVerts(u8 *dest);
void DecodeInds();
int DecodeInds();

// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
Expand Down Expand Up @@ -202,6 +202,8 @@ class DrawEngineCommon {
vertexCountInDrawCalls_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
seenPrims_ = 0;
anyCCWOrIndexed_ = false;
gstate_c.vertexFullAlpha = true;

// Now seems as good a time as any to reset the min/max coords, which we may examine later.
Expand All @@ -211,6 +213,34 @@ class DrawEngineCommon {
gstate_c.vertBounds.maxV = 0;
}

inline bool CollectedPureDraw() const {
switch (seenPrims_) {
case 1 << GE_PRIM_TRIANGLE_STRIP:
return !anyCCWOrIndexed_ && numDrawInds_ == 1;
case 1 << GE_PRIM_LINES:
case 1 << GE_PRIM_POINTS:
case 1 << GE_PRIM_TRIANGLES:
return !anyCCWOrIndexed_;
default:
return false;
}
}

inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) {
if (!forceIndexed && CollectedPureDraw()) {
*prim = drawInds_[0].prim;
*numVerts = numDecodedVerts_;
*maxIndex = numDecodedVerts_;
*useElements = false;
} else {
int vertexCount = DecodeInds();
*numVerts = vertexCount;
*maxIndex = numDecodedVerts_;
*prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
*useElements = true;
}
}

uint32_t ComputeDrawcallsHash() const;

bool useHWTransform_ = false;
Expand All @@ -227,9 +257,7 @@ class DrawEngineCommon {
u16 *decIndex_ = nullptr;

// Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
DenseHashMap<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr;
VertexDecoderOptions decOptions_{};

Expand All @@ -239,18 +267,18 @@ class DrawEngineCommon {
// Defer all vertex decoding to a "Flush" (except when software skinning)
struct DeferredVerts {
const void *verts;
UVScale uvScale;
u32 vertexCount;
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
};

struct DeferredInds {
const void *inds;
u32 vertexCount;
u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset.
u8 indexType;
s8 prim;
GEPrimitiveType prim;
bool clockwise;
u16 offset;
};
Expand All @@ -261,13 +289,19 @@ class DrawEngineCommon {
uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];
DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];

VertexDecoder *dec_ = nullptr;
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
int numDrawVerts_ = 0;
int numDrawInds_ = 0;
int vertexCountInDrawCalls_ = 0;

int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;

int seenPrims_ = 0;
bool anyCCWOrIndexed_ = 0;
bool anyIndexed_ = 0;

// Vertex collector state
IndexGenerator indexGen;
int numDecodedVerts_ = 0;
Expand Down

0 comments on commit 61acce1

Please sign in to comment.