From 61acce195cf04eebc1194ff31f97fb36d1ada84a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 6 Oct 2023 12:43:10 +0200 Subject: [PATCH] Avoid decoding indices when we don't need them. --- GPU/Common/DrawEngineCommon.cpp | 32 +++++++--- GPU/Common/DrawEngineCommon.h | 44 ++++++++++++-- GPU/Common/IndexGenerator.cpp | 103 +++++++++----------------------- GPU/Common/IndexGenerator.h | 50 ++++------------ GPU/D3D11/DrawEngineD3D11.cpp | 28 ++++----- GPU/Directx9/DrawEngineDX9.cpp | 27 ++++----- GPU/GLES/DrawEngineGLES.cpp | 41 ++++++------- GPU/Vulkan/DrawEngineVulkan.cpp | 30 +++------- GPU/ge_constants.h | 3 +- 9 files changed, 151 insertions(+), 207 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index ceba9345e9b4..7a5411c6833c 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -816,6 +816,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t * _dbg_assert_(numDrawInds_ <= MAX_DEFERRED_DRAW_INDS); // if it's equal, the check below will take care of it before any action is taken. _dbg_assert_(numDrawVerts_ > 0); + if (!clockwise) { + anyCCWOrIndexed_ = true; + } + int seenPrims = 0; while (cmd != stall) { uint32_t data = *cmd; if ((data & 0xFFF80000) != 0x04000000) { @@ -831,6 +835,7 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t * DeferredInds &di = drawInds_[numDrawInds_++]; di.indexType = 0; di.prim = newPrim; + seenPrims |= (1 << newPrim); di.clockwise = clockwise; di.vertexCount = vertexCount; di.vertDecodeIndex = prevDrawVerts; @@ -839,6 +844,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t * cmd++; } + seenPrims_ |= seenPrims; + + _dbg_assert_(cmd != start); + int totalCount = offset - dv.vertexCount; dv.vertexCount = offset; dv.indexUpperBound = dv.vertexCount - 1; @@ -910,9 +919,16 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti DeferredInds &di = drawInds_[numDrawInds_++]; di.inds = inds; - di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; + int indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; + if (indexType) { + anyCCWOrIndexed_ = true; + } + di.indexType = indexType; di.prim = prim; di.clockwise = clockwise; + if (!clockwise) { + anyCCWOrIndexed_ = true; + } di.vertexCount = vertexCount; di.vertDecodeIndex = numDrawVerts_; di.offset = 0; @@ -942,6 +958,7 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti } vertexCountInDrawCalls_ += vertexCount; + seenPrims_ |= (1 << prim); if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { // This prevents issues with consecutive self-renders in Ridge Racer. @@ -952,6 +969,8 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti } void DrawEngineCommon::DecodeVerts(u8 *dest) { + // Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time). + int i = decodeVertsCounter_; int stride = (int)dec_->GetDecVtxFmt().stride; for (; i < numDrawVerts_; i++) { @@ -968,7 +987,9 @@ void DrawEngineCommon::DecodeVerts(u8 *dest) { decodeVertsCounter_ = i; } -void DrawEngineCommon::DecodeInds() { +int DrawEngineCommon::DecodeInds() { + // Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time). + int i = decodeIndsCounter_; for (; i < numDrawInds_; i++) { const DeferredInds &di = drawInds_[i]; @@ -994,12 +1015,7 @@ void DrawEngineCommon::DecodeInds() { } decodeIndsCounter_ = i; - // Sanity check - if (indexGen.Prim() < 0) { - ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim()); - // Force to points (0) - indexGen.AddPrim(GE_PRIM_POINTS, 0, 0, true); - } + return indexGen.VertexCount(); } bool DrawEngineCommon::CanUseHardwareTransform(int prim) { diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 09261e5b59a5..437268c0a531 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -152,7 +152,7 @@ class DrawEngineCommon { void UpdatePlanes(); void DecodeVerts(u8 *dest); - void DecodeInds(); + int DecodeInds(); // Preprocessing for spline/bezier u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr); @@ -202,6 +202,8 @@ class DrawEngineCommon { vertexCountInDrawCalls_ = 0; decodeIndsCounter_ = 0; decodeVertsCounter_ = 0; + seenPrims_ = 0; + anyCCWOrIndexed_ = false; gstate_c.vertexFullAlpha = true; // Now seems as good a time as any to reset the min/max coords, which we may examine later. @@ -211,6 +213,34 @@ class DrawEngineCommon { gstate_c.vertBounds.maxV = 0; } + inline bool CollectedPureDraw() const { + switch (seenPrims_) { + case 1 << GE_PRIM_TRIANGLE_STRIP: + return !anyCCWOrIndexed_ && numDrawInds_ == 1; + case 1 << GE_PRIM_LINES: + case 1 << GE_PRIM_POINTS: + case 1 << GE_PRIM_TRIANGLES: + return !anyCCWOrIndexed_; + default: + return false; + } + } + + inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) { + if (!forceIndexed && CollectedPureDraw()) { + *prim = drawInds_[0].prim; + *numVerts = numDecodedVerts_; + *maxIndex = numDecodedVerts_; + *useElements = false; + } else { + int vertexCount = DecodeInds(); + *numVerts = vertexCount; + *maxIndex = numDecodedVerts_; + *prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim); + *useElements = true; + } + } + uint32_t ComputeDrawcallsHash() const; bool useHWTransform_ = false; @@ -227,9 +257,7 @@ class DrawEngineCommon { u16 *decIndex_ = nullptr; // Cached vertex decoders - u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_... DenseHashMap decoderMap_; - VertexDecoder *dec_ = nullptr; VertexDecoderJitCache *decJitCache_ = nullptr; VertexDecoderOptions decOptions_{}; @@ -239,10 +267,10 @@ class DrawEngineCommon { // Defer all vertex decoding to a "Flush" (except when software skinning) struct DeferredVerts { const void *verts; + UVScale uvScale; u32 vertexCount; u16 indexLowerBound; u16 indexUpperBound; - UVScale uvScale; }; struct DeferredInds { @@ -250,7 +278,7 @@ class DrawEngineCommon { u32 vertexCount; u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset. u8 indexType; - s8 prim; + GEPrimitiveType prim; bool clockwise; u16 offset; }; @@ -261,6 +289,8 @@ class DrawEngineCommon { uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS]; DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS]; + VertexDecoder *dec_ = nullptr; + u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_... int numDrawVerts_ = 0; int numDrawInds_ = 0; int vertexCountInDrawCalls_ = 0; @@ -268,6 +298,10 @@ class DrawEngineCommon { int decodeVertsCounter_ = 0; int decodeIndsCounter_ = 0; + int seenPrims_ = 0; + bool anyCCWOrIndexed_ = 0; + bool anyIndexed_ = 0; + // Vertex collector state IndexGenerator indexGen; int numDecodedVerts_ = 0; diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 45a23d991a32..1a0a91885874 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -67,9 +67,6 @@ void IndexGenerator::AddPoints(int numVerts, int indexOffset) { for (int i = 0; i < numVerts; i++) *outInds++ = indexOffset + i; inds_ = outInds; - // ignore overflow verts - prim_ = GE_PRIM_POINTS; - seenPrims_ |= 1 << GE_PRIM_POINTS; } void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) { @@ -82,13 +79,6 @@ void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) { *outInds++ = indexOffset + i + v2; } inds_ = outInds; - // ignore overflow verts - prim_ = GE_PRIM_TRIANGLES; - seenPrims_ |= 1 << GE_PRIM_TRIANGLES; - if (!clockwise) { - // Make sure we don't treat this as pure. - seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP; - } } alignas(16) static const u16 offsets_clockwise[24] = { @@ -203,17 +193,6 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) { } inds_ = outInds; #endif - - // This is so we can detect one single strip by just looking at seenPrims_. - if (!seenPrims_ && clockwise) { - seenPrims_ = 1 << GE_PRIM_TRIANGLE_STRIP; - prim_ = GE_PRIM_TRIANGLE_STRIP; - pureCount_ = numVerts; - } else { - seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | (1 << GE_PRIM_TRIANGLES); - prim_ = GE_PRIM_TRIANGLES; - pureCount_ = 0; - } } void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) { @@ -227,12 +206,6 @@ void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) { *outInds++ = indexOffset + i + v2; } inds_ = outInds; - prim_ = GE_PRIM_TRIANGLES; - seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN; - if (!clockwise) { - // Make sure we don't treat this as pure. - seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP; - } } //Lines @@ -243,8 +216,6 @@ void IndexGenerator::AddLineList(int numVerts, int indexOffset) { *outInds++ = indexOffset + i + 1; } inds_ = outInds; - prim_ = GE_PRIM_LINES; - seenPrims_ |= 1 << prim_; } void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) { @@ -255,8 +226,6 @@ void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) { *outInds++ = indexOffset + i + 1; } inds_ = outInds; - prim_ = GE_PRIM_LINES; - seenPrims_ |= 1 << GE_PRIM_LINE_STRIP; } void IndexGenerator::AddRectangles(int numVerts, int indexOffset) { @@ -268,21 +237,17 @@ void IndexGenerator::AddRectangles(int numVerts, int indexOffset) { *outInds++ = indexOffset + i + 1; } inds_ = outInds; - prim_ = GE_PRIM_RECTANGLES; - seenPrims_ |= 1 << GE_PRIM_RECTANGLES; } -template +template void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) { u16 *outInds = inds_; for (int i = 0; i < numInds; i++) *outInds++ = indexOffset + inds[i]; inds_ = outInds; - prim_ = GE_PRIM_POINTS; - seenPrims_ |= (1 << GE_PRIM_POINTS) | flag; } -template +template void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) { u16 *outInds = inds_; numInds = numInds & ~1; @@ -291,11 +256,9 @@ void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int ind *outInds++ = indexOffset + inds[i + 1]; } inds_ = outInds; - prim_ = GE_PRIM_LINES; - seenPrims_ |= (1 << GE_PRIM_LINES) | flag; } -template +template void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) { int numLines = numInds - 1; u16 *outInds = inds_; @@ -304,11 +267,9 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in *outInds++ = indexOffset + inds[i + 1]; } inds_ = outInds; - prim_ = GE_PRIM_LINES; - seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | flag; } -template +template void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { // We only bother doing this minor optimization in triangle list, since it's by far the most // common operation that can benefit. @@ -329,11 +290,9 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf } inds_ = outInds; } - prim_ = GE_PRIM_TRIANGLES; - seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | flag; } -template +template void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { int wind = clockwise ? 1 : 2; int numTris = numInds - 2; @@ -345,11 +304,9 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO *outInds++ = indexOffset + inds[i + wind]; } inds_ = outInds; - prim_ = GE_PRIM_TRIANGLES; - seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | flag; } -template +template void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { if (numInds <= 0) return; int numTris = numInds - 2; @@ -362,11 +319,9 @@ void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOff *outInds++ = indexOffset + inds[i + v2]; } inds_ = outInds; - prim_ = GE_PRIM_TRIANGLES; - seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | flag; } -template +template inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) { u16 *outInds = inds_; //rectangles always need 2 vertices, disregard the last one if there's an odd number @@ -376,43 +331,41 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds *outInds++ = indexOffset + inds[i+1]; } inds_ = outInds; - prim_ = GE_PRIM_RECTANGLES; - seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | flag; } // Could template this too, but would have to define in header. void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) { switch (prim) { - case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; - case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; - case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same + case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; + case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; + case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) { switch (prim) { - case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; - case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; - case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same + case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; + case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; + case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) { switch (prim) { - case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; - case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; - case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same + case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; + case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; + case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } diff --git a/GPU/Common/IndexGenerator.h b/GPU/Common/IndexGenerator.h index 77a49f39eb35..9e77c2591f1a 100644 --- a/GPU/Common/IndexGenerator.h +++ b/GPU/Common/IndexGenerator.h @@ -26,32 +26,22 @@ class IndexGenerator { public: void Setup(u16 *indexptr); void Reset() { - prim_ = GE_PRIM_INVALID; - seenPrims_ = 0; - pureCount_ = 0; this->inds_ = indsBase_; } - bool PrimCompatible(int prim1, int prim2) { + static bool PrimCompatible(int prim1, int prim2) { if (prim1 == GE_PRIM_INVALID || prim2 == GE_PRIM_KEEP_PREVIOUS) return true; return indexedPrimitiveType[prim1] == indexedPrimitiveType[prim2]; } - bool PrimCompatible(int prim) const { - if (prim_ == GE_PRIM_INVALID || prim == GE_PRIM_KEEP_PREVIOUS) - return true; - return indexedPrimitiveType[prim] == prim_; - } - - GEPrimitiveType Prim() const { return prim_; } - GEPrimitiveType GeneralPrim() const { - switch (prim_) { + static GEPrimitiveType GeneralPrim(GEPrimitiveType prim) { + switch (prim) { case GE_PRIM_LINE_STRIP: return GE_PRIM_LINES; break; case GE_PRIM_TRIANGLE_STRIP: case GE_PRIM_TRIANGLE_FAN: return GE_PRIM_TRIANGLES; break; default: - return prim_; + return prim; } } @@ -60,15 +50,8 @@ class IndexGenerator { void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise); void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise); + // This is really the number of generated indices, or 3x the number of triangles. int VertexCount() const { return inds_ - indsBase_; } - int SeenPrims() const { return seenPrims_; } - int PureCount() const { return pureCount_; } - bool SeenOnlyPurePrims() const { - return seenPrims_ == (1 << GE_PRIM_TRIANGLES) || - seenPrims_ == (1 << GE_PRIM_LINES) || - seenPrims_ == (1 << GE_PRIM_POINTS) || - seenPrims_ == (1 << GE_PRIM_TRIANGLE_STRIP); - } private: // Points (why index these? code simplicity) @@ -84,34 +67,25 @@ class IndexGenerator { void AddRectangles(int numVerts, int indexOffset); // These translate already indexed lists - template + template void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset); - template + template void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); - template + template inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset); - template + template inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset); - template + template void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); - template + template void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); - template + template inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset); - enum { - SEEN_INDEX8 = 1 << 16, - SEEN_INDEX16 = 1 << 17, - SEEN_INDEX32 = 1 << 18, - }; - u16 *indsBase_; u16 *inds_; - int pureCount_; - GEPrimitiveType prim_; - int seenPrims_; static const u8 indexedPrimitiveType[7]; }; diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 5bd6f5991895..d2c6b4825d79 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -286,16 +286,12 @@ void DrawEngineD3D11::DoFlush() { ID3D11Buffer *vb_ = nullptr; ID3D11Buffer *ib_ = nullptr; + int vertexCount; + int maxIndex; + bool useElements; DecodeVerts(decoded_); - DecodeInds(); - - bool useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN; - int vertexCount = indexGen.VertexCount(); + DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false); gpuStats.numUncachedVertsDrawn += vertexCount; - if (!useElements && indexGen.PureCount()) { - vertexCount = indexGen.PureCount(); - } - prim = indexGen.Prim(); bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { @@ -336,7 +332,7 @@ void DrawEngineD3D11::DoFlush() { context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset); if (useElements) { UINT iOffset; - int iSize = 2 * indexGen.VertexCount(); + int iSize = 2 * vertexCount; uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize); memcpy(iptr, decIndex_, iSize); pushInds_->EndPush(context_); @@ -363,7 +359,8 @@ void DrawEngineD3D11::DoFlush() { dec_ = GetVertexDecoder(lastVType_); } DecodeVerts(decoded_); - DecodeInds(); + int vertexCount = DecodeInds(); + bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255); @@ -371,12 +368,9 @@ void DrawEngineD3D11::DoFlush() { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); } - gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); - prim = indexGen.Prim(); - // Undo the strip optimization, not supported by the SW code yet. - if (prim == GE_PRIM_TRIANGLE_STRIP) - prim = GE_PRIM_TRIANGLES; - VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); + gpuStats.numUncachedVertsDrawn += vertexCount; + prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim); + VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, vertexCount); u16 *inds = decIndex_; SoftwareTransformResult result{}; @@ -424,7 +418,7 @@ void DrawEngineD3D11::DoFlush() { ApplyDrawState(prim); if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, numDecodedVerts_, &result); + swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, numDecodedVerts_, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index fd248da2d6d2..c3214f548cf8 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -258,16 +258,12 @@ void DrawEngineDX9::DoFlush() { LPDIRECT3DVERTEXBUFFER9 vb_ = nullptr; LPDIRECT3DINDEXBUFFER9 ib_ = nullptr; + int vertexCount; + int maxIndex; + bool useElements; DecodeVerts(decoded_); - DecodeInds(); - - bool useElements = !indexGen.SeenOnlyPurePrims(); - int vertexCount = indexGen.VertexCount(); + DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false); gpuStats.numUncachedVertsDrawn += vertexCount; - if (!useElements && indexGen.PureCount()) { - vertexCount = indexGen.PureCount(); - } - prim = indexGen.Prim(); _dbg_assert_((int)prim > 0); @@ -315,7 +311,8 @@ void DrawEngineDX9::DoFlush() { dec_ = GetVertexDecoder(lastVType_); } DecodeVerts(decoded_); - DecodeInds(); + int vertexCount = DecodeInds(); + bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255); @@ -323,12 +320,9 @@ void DrawEngineDX9::DoFlush() { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); } - gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); - prim = indexGen.Prim(); - // Undo the strip optimization, not supported by the SW code yet. - if (prim == GE_PRIM_TRIANGLE_STRIP) - prim = GE_PRIM_TRIANGLES; - VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); + gpuStats.numUncachedVertsDrawn += vertexCount; + prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim); + VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, vertexCount); u16 *inds = decIndex_; SoftwareTransformResult result{}; @@ -354,6 +348,7 @@ void DrawEngineDX9::DoFlush() { UpdateCachedViewportState(vpAndScissor); } + int maxIndex = numDecodedVerts_; SoftwareTransform swTransform(params); // Half pixel offset hack. @@ -379,7 +374,7 @@ void DrawEngineDX9::DoFlush() { ApplyDrawState(prim); if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, numDecodedVerts_, &result); + swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, numDecodedVerts_, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 9ba25ada163e..d96f8f7574fa 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -284,19 +284,20 @@ void DrawEngineGLES::DoFlush() { u8 *dest = (u8 *)frameData.pushVertex->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vertexBuffer, &vertexBufferOffset); DecodeVerts(dest); } - DecodeInds(); - - // If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS, - // there is no need for the index buffer we built. We can then use glDrawArrays instead - // for a very minor speed boost. TODO: We can probably detect this case earlier, like before - // actually doing any vertex decoding (unless we're doing soft skinning and pre-decode on submit). - bool useElements = !indexGen.SeenOnlyPurePrims(); - int vertexCount = indexGen.VertexCount(); - gpuStats.numUncachedVertsDrawn += vertexCount; - if (!useElements && indexGen.PureCount()) { - vertexCount = indexGen.PureCount(); + + int vertexCount; + int maxIndex; + bool useElements; + DecodeVerts(decoded_); + DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false); + + if (useElements) { + uint32_t esz = sizeof(uint16_t) * vertexCount; + void *dest = frameData.pushIndex->Allocate(esz, 2, &indexBuffer, &indexBufferOffset); + // TODO: When we need to apply an index offset, we can apply it directly when copying the indices here. + // Of course, minding the maximum value of 65535... + memcpy(dest, decIndex_, esz); } - prim = indexGen.Prim(); bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { @@ -316,11 +317,6 @@ void DrawEngineGLES::DoFlush() { LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, pipelineState_, framebufferManager_->UseBufferedRendering()); GLRInputLayout *inputLayout = SetupDecFmtForDraw(dec_->GetDecVtxFmt()); if (useElements) { - uint32_t esz = sizeof(uint16_t) * indexGen.VertexCount(); - void *dest = frameData.pushIndex->Allocate(esz, 2, &indexBuffer, &indexBufferOffset); - // TODO: When we need to apply an index offset, we can apply it directly when copying the indices here. - // Of course, minding the maximum value of 65535... - memcpy(dest, decIndex_, esz); render_->DrawIndexed(inputLayout, vertexBuffer, vertexBufferOffset, indexBuffer, indexBufferOffset, @@ -338,7 +334,7 @@ void DrawEngineGLES::DoFlush() { dec_ = GetVertexDecoder(lastVType_); } DecodeVerts(decoded_); - DecodeInds(); + int vertexCount = DecodeInds(); bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { @@ -347,11 +343,8 @@ void DrawEngineGLES::DoFlush() { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); } - gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); - prim = indexGen.Prim(); - // Undo the strip optimization, not supported by the SW code yet. - if (prim == GE_PRIM_TRIANGLE_STRIP) - prim = GE_PRIM_TRIANGLES; + gpuStats.numUncachedVertsDrawn += vertexCount; + prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim); u16 *inds = decIndex_; SoftwareTransformResult result{}; @@ -377,7 +370,7 @@ void DrawEngineGLES::DoFlush() { UpdateCachedViewportState(vpAndScissor_); } - int vertexCount = indexGen.VertexCount(); + int maxIndex = numDecodedVerts_; // TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts. if (gl_extensions.IsGLES && !gl_extensions.GLES3) { diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 3b1ff9ad4bde..95f4f99b6a1c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -258,21 +258,11 @@ void DrawEngineVulkan::DoFlush() { u8 *dest = pushVertex_->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vbuf, &vbOffset); DecodeVerts(dest); } - DecodeInds(); + int vertexCount; + int maxIndex; bool useElements; - int vertexCount = indexGen.VertexCount(); - gpuStats.numUncachedVertsDrawn += vertexCount; - if (forceIndexed) { - useElements = true; - prim = indexGen.GeneralPrim(); - } else { - useElements = !indexGen.SeenOnlyPurePrims(); - if (!useElements && indexGen.PureCount()) { - vertexCount = indexGen.PureCount(); - } - prim = indexGen.Prim(); - } + DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false); bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { @@ -363,7 +353,7 @@ void DrawEngineVulkan::DoFlush() { }; if (useElements) { if (!ibuf) { - ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * indexGen.VertexCount(), 4, &ibuf); + ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * vertexCount, 4, &ibuf); } renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1); } else { @@ -379,7 +369,7 @@ void DrawEngineVulkan::DoFlush() { int prevDecodedVerts = numDecodedVerts_; DecodeVerts(decoded_); - DecodeInds(); + int vertexCount = DecodeInds(); bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; if (gstate.isModeThrough()) { @@ -388,12 +378,8 @@ void DrawEngineVulkan::DoFlush() { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); } - gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); - prim = indexGen.Prim(); - // Undo the strip optimization, not supported by the SW code yet. - if (prim == GE_PRIM_TRIANGLE_STRIP) - prim = GE_PRIM_TRIANGLES; - _dbg_assert_(prim != GE_PRIM_INVALID); + gpuStats.numUncachedVertsDrawn += vertexCount; + prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim); u16 *inds = decIndex_; SoftwareTransformResult result{}; @@ -436,7 +422,7 @@ void DrawEngineVulkan::DoFlush() { result.action = SW_NOT_READY; if (result.action == SW_NOT_READY) { swTransform.DetectOffsetTexture(numDecodedVerts_); - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, numDecodedVerts_, &result); + swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, numDecodedVerts_, &result); } if (result.setSafeSize) diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index e425dbeef791..80db78dfac70 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -583,8 +583,7 @@ enum GETexProjMapMode GE_PROJMAP_NORMAL = 3, }; -enum GEPrimitiveType -{ +enum GEPrimitiveType : int8_t { GE_PRIM_POINTS = 0, GE_PRIM_LINES = 1, GE_PRIM_LINE_STRIP = 2,