diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index dfe127fa3e52..76be8eb225a1 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -678,6 +678,38 @@ uint64_t DrawEngineCommon::ComputeHash() { return fullhash; } +bool DrawEngineCommon::ExtendNonIndexedPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { + if (numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { + return false; + } + + bool applySkin = (vertTypeID & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode; + if (applySkin) { + // TODO: Support this somehow. + return false; + } + + _dbg_assert_(numDrawInds_ < MAX_DEFERRED_DRAW_INDS); + _dbg_assert_(numDrawVerts_ > 0); + *bytesRead = vertexCount * dec_->VertexSize(); + + DeferredInds &di = drawInds_[numDrawInds_++]; + di.inds = nullptr; + di.indexType = 0; + di.prim = prim; + di.cullMode = cullMode; + di.vertexCount = vertexCount; + di.vertDecodeIndex = numDrawVerts_ - 1; + + DeferredVerts &dv = drawVerts_[numDrawVerts_ - 1]; + int offset = dv.vertexCount; + di.offset = offset; + dv.vertexCount += vertexCount; + dv.indexUpperBound = dv.vertexCount - 1; + vertexCountInDrawCalls_ += vertexCount; + return true; +} + // vertTypeID is the vertex type but with the UVGen mode smashed into the top bits. void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_VERTS || numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { @@ -750,15 +782,15 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti vertexCountInDrawCalls_ += vertexCount; - if (applySkin) { - DecodeVerts(decoded_); - } - if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { // This prevents issues with consecutive self-renders in Ridge Racer. gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); DispatchFlush(); } + + if (applySkin) { + DecodeVerts(decoded_); + } } void DrawEngineCommon::DecodeVerts(u8 *dest) { diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index f53988700f3b..a6461c9cfb12 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -104,6 +104,7 @@ class DrawEngineCommon { bool TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType); + bool ExtendNonIndexedPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead); void SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead); template void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope); diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 544f1c58cc66..5bf7e9873c6e 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -967,6 +967,8 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); const void *inds = nullptr; + + bool canExtend = true; u32 vertexType = gstate.vertType; if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { u32 indexAddr = gstate_c.indexAddr; @@ -975,6 +977,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { return; } inds = Memory::GetPointerUnchecked(indexAddr); + canExtend = false; } int bytesRead = 0; @@ -1017,12 +1020,25 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { if (IsTrianglePrim(newPrim) != isTriangle) goto bail; // Can't join over this boundary. Might as well exit and get this on the next time around. // TODO: more efficient updating of verts/inds + + u32 count = data & 0xFFFF; + if (canExtend) { + // Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices + // are consecutive in memory. + _dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE); + if (drawEngineCommon_->ExtendNonIndexedPrim(newPrim, count, vertTypeID, cullMode, &bytesRead)) { + gstate_c.vertexAddr += bytesRead; + totalVertCount += count; + break; + } + } + + // Failed, or can't extend? Do a normal submit. verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); inds = nullptr; if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - u32 count = data & 0xFFFF; drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead); AdvanceVerts(vertexType, count, bytesRead); totalVertCount += count; @@ -1030,6 +1046,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { } case GE_CMD_VERTEXTYPE: { + canExtend = false; // TODO: Might support extending between some vertex types in the future. uint32_t diff = data ^ vertexType; // don't mask upper bits, vertexType is unmasked if (diff & vtypeCheckMask) { @@ -1043,6 +1060,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { case GE_CMD_VADDR: gstate.cmdmem[GE_CMD_VADDR] = data; gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF); + canExtend = false; break; case GE_CMD_IADDR: gstate.cmdmem[GE_CMD_IADDR] = data; @@ -1051,6 +1069,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { case GE_CMD_OFFSETADDR: gstate.cmdmem[GE_CMD_OFFSETADDR] = data; gstate_c.offsetAddr = data << 8; + canExtend = false; break; case GE_CMD_BASE: gstate.cmdmem[GE_CMD_BASE] = data;