Skip to content

Commit

Permalink
DrawEngine; Convert strip sequences in a tight loop
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Oct 6, 2023
1 parent 15df71c commit cd35252
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 38 deletions.
61 changes: 39 additions & 22 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -610,7 +610,7 @@ u32 DrawEngineCommon::ComputeMiniHash() {
}
for (int i = 0; i < numDrawInds_; i += step) {
const DeferredInds &di = drawInds_[i];
if (di.inds) {
if (di.indexType != 0) {
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
}
}
Expand Down Expand Up @@ -638,8 +638,10 @@ uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
}
for (int j = 0; j < numDrawInds_; j++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawInds_[j].inds, 19);
dcid = lowbias32_r(__rotl(dhash ^ (u32)drawInds_[j].indexType, 7));
if (drawInds_[j].inds) {
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawInds_[j].inds, 19);
dcid = lowbias32_r(__rotl(dhash ^ (u32)drawInds_[j].indexType, 7));
}
}
return dcid;
}
Expand Down Expand Up @@ -678,31 +680,46 @@ uint64_t DrawEngineCommon::ComputeHash() {
return fullhash;
}

bool DrawEngineCommon::ExtendNonIndexedPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
return false;
}
int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, u32 vertTypeID, int cullMode, int *bytesRead, bool isTriangle) {
const uint32_t *start = cmd;
int prevDrawVerts = numDrawVerts_ - 1;
DeferredVerts &dv = drawVerts_[prevDrawVerts];
int offset = dv.vertexCount;

_dbg_assert_(numDrawInds_ < MAX_DEFERRED_DRAW_INDS);
_dbg_assert_(numDrawVerts_ > 0);
*bytesRead = vertexCount * dec_->VertexSize();

DeferredInds &di = drawInds_[numDrawInds_++];
di.inds = nullptr;
di.indexType = 0;
di.prim = prim;
di.cullMode = cullMode;
di.vertexCount = vertexCount;
di.vertDecodeIndex = numDrawVerts_ - 1;
while (true) {
uint32_t data = *cmd;
if ((data & 0xFFF80000) != 0x04000000) {
break;
}
GEPrimitiveType newPrim = static_cast<GEPrimitiveType>((data >> 16) & 7);
if (IsTrianglePrim(newPrim) != isTriangle)
break;
int vertexCount = data & 0xFFFF;
if (numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
break;
}
DeferredInds &di = drawInds_[numDrawInds_++];
di.indexType = 0;
di.prim = newPrim;
di.cullMode = cullMode;
di.vertexCount = vertexCount;
di.vertDecodeIndex = prevDrawVerts;
di.offset = offset;
offset += vertexCount;
cmd++;
}

DeferredVerts &dv = drawVerts_[numDrawVerts_ - 1];
int offset = dv.vertexCount;
di.offset = offset;
dv.vertexCount += vertexCount;
dv.indexUpperBound = dv.vertexCount - 1;
vertexCountInDrawCalls_ += vertexCount;
_dbg_assert_(cmd != start);

return true;
int totalCount = offset - dv.vertexCount;
dv.vertexCount = offset;
dv.indexUpperBound = dv.vertexCount - 1;
vertexCountInDrawCalls_ += totalCount;
*bytesRead = totalCount * dec_->VertexSize();
return cmd - start;
}

// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.h
Expand Up @@ -111,7 +111,7 @@ class DrawEngineCommon {
}
}

bool ExtendNonIndexedPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
int ExtendNonIndexedPrim(const uint32_t *cmd, u32 vertTypeID, int cullMode, int *bytesRead, bool isTriangle);
void SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
template<class Surface>
void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);
Expand Down
23 changes: 12 additions & 11 deletions GPU/GPUCommon.h
Expand Up @@ -67,6 +67,18 @@ struct TransformedVertex {
}
};

inline bool IsTrianglePrim(GEPrimitiveType prim) {
// TODO: KEEP_PREVIOUS is mistakenly treated as TRIANGLE here... This isn't new.
//
// Interesting optimization, but not confident in performance:
// static const bool p[8] = { false, false, false, true, true, true, false, true };
// 10111000 = 0xB8;
// return (0xB8U >> (u8)prim) & 1;

return prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES;
}


class GPUCommon : public GPUInterface, public GPUDebugInterface {
public:
GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
Expand Down Expand Up @@ -219,17 +231,6 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface {

virtual void CheckRenderResized() {}

inline bool IsTrianglePrim(GEPrimitiveType prim) const {
// TODO: KEEP_PREVIOUS is mistakenly treated as TRIANGLE here... This isn't new.
//
// Interesting optimization, but not confident in performance:
// static const bool p[8] = { false, false, false, true, true, true, false, true };
// 10111000 = 0xB8;
// return (0xB8U >> (u8)prim) & 1;

return prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES;
}

void SetDrawType(DrawType type, GEPrimitiveType prim) {
if (type != lastDraw_) {
// We always flush when drawing splines/beziers so no need to do so here
Expand Down
11 changes: 7 additions & 4 deletions GPU/GPUCommonHW.cpp
Expand Up @@ -1026,11 +1026,14 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
// Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices
// are consecutive in memory.
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
if (drawEngineCommon_->ExtendNonIndexedPrim(newPrim, count, vertTypeID, cullMode, &bytesRead)) {
gstate_c.vertexAddr += bytesRead;
totalVertCount += count;
break;
int commandsExecuted = drawEngineCommon_->ExtendNonIndexedPrim(src, vertTypeID, cullMode, &bytesRead, isTriangle);
if (!commandsExecuted) {
goto bail;
}
src += commandsExecuted - 1;
gstate_c.vertexAddr += bytesRead;
totalVertCount += count;
break;
}

// Failed, or can't extend? Do a normal submit.
Expand Down

0 comments on commit cd35252

Please sign in to comment.