Skip to content

Commit

Permalink
Implement fast-path for merging non-indexed draws quickly.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Oct 3, 2023
1 parent e63bb04 commit 0260aeb
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 5 deletions.
40 changes: 36 additions & 4 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -678,6 +678,38 @@ uint64_t DrawEngineCommon::ComputeHash() {
return fullhash;
}

bool DrawEngineCommon::ExtendNonIndexedPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
return false;
}

bool applySkin = (vertTypeID & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;
if (applySkin) {
// TODO: Support this somehow.
return false;
}

_dbg_assert_(numDrawInds_ < MAX_DEFERRED_DRAW_INDS);
_dbg_assert_(numDrawVerts_ > 0);
*bytesRead = vertexCount * dec_->VertexSize();

DeferredInds &di = drawInds_[numDrawInds_++];
di.inds = nullptr;
di.indexType = 0;
di.prim = prim;
di.cullMode = cullMode;
di.vertexCount = vertexCount;
di.vertDecodeIndex = numDrawVerts_ - 1;

DeferredVerts &dv = drawVerts_[numDrawVerts_ - 1];
int offset = dv.vertexCount;
di.offset = offset;
dv.vertexCount += vertexCount;
dv.indexUpperBound = dv.vertexCount - 1;
vertexCountInDrawCalls_ += vertexCount;
return true;
}

// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_VERTS || numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
Expand Down Expand Up @@ -750,15 +782,15 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti

vertexCountInDrawCalls_ += vertexCount;

if (applySkin) {
DecodeVerts(decoded_);
}

if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
// This prevents issues with consecutive self-renders in Ridge Racer.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
DispatchFlush();
}

if (applySkin) {
DecodeVerts(decoded_);
}
}

void DrawEngineCommon::DecodeVerts(u8 *dest) {
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/DrawEngineCommon.h
Expand Up @@ -104,6 +104,7 @@ class DrawEngineCommon {

bool TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType);

bool ExtendNonIndexedPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
void SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
template<class Surface>
void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);
Expand Down
21 changes: 20 additions & 1 deletion GPU/GPUCommonHW.cpp
Expand Up @@ -967,6 +967,8 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {

const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *inds = nullptr;

bool canExtend = true;
u32 vertexType = gstate.vertType;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
Expand All @@ -975,6 +977,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
canExtend = false;
}

int bytesRead = 0;
Expand Down Expand Up @@ -1017,19 +1020,33 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
if (IsTrianglePrim(newPrim) != isTriangle)
goto bail; // Can't join over this boundary. Might as well exit and get this on the next time around.
// TODO: more efficient updating of verts/inds

u32 count = data & 0xFFFF;
if (canExtend) {
// Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices
// are consecutive in memory.
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
if (drawEngineCommon_->ExtendNonIndexedPrim(newPrim, count, vertTypeID, cullMode, &bytesRead)) {
gstate_c.vertexAddr += bytesRead;
totalVertCount += count;
break;
}
}

// Failed, or can't extend? Do a normal submit.
verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
inds = nullptr;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
u32 count = data & 0xFFFF;
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead);
AdvanceVerts(vertexType, count, bytesRead);
totalVertCount += count;
break;
}
case GE_CMD_VERTEXTYPE:
{
canExtend = false; // TODO: Might support extending between some vertex types in the future.
uint32_t diff = data ^ vertexType;
// don't mask upper bits, vertexType is unmasked
if (diff & vtypeCheckMask) {
Expand All @@ -1043,6 +1060,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
case GE_CMD_VADDR:
gstate.cmdmem[GE_CMD_VADDR] = data;
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
canExtend = false;
break;
case GE_CMD_IADDR:
gstate.cmdmem[GE_CMD_IADDR] = data;
Expand All @@ -1051,6 +1069,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
case GE_CMD_OFFSETADDR:
gstate.cmdmem[GE_CMD_OFFSETADDR] = data;
gstate_c.offsetAddr = data << 8;
canExtend = false;
break;
case GE_CMD_BASE:
gstate.cmdmem[GE_CMD_BASE] = data;
Expand Down

0 comments on commit 0260aeb

Please sign in to comment.