Skip to content

Commit

Permalink
Frustum-cull small draws
Browse files Browse the repository at this point in the history
Some games do a poor job of culling stuff, and some transparent
sprites can be very expensive if they cause a copy.
Skipping them if outside the viewport makes sense in that case.

One example are the flame sprites in #17797 .

Additionally, we should be able to cull through-mode draws easily, this
one doesn't even try.
  • Loading branch information
hrydgard committed Dec 9, 2023
1 parent 3e20fab commit 0905b6a
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 5 deletions.
1 change: 0 additions & 1 deletion Common/UI/View.cpp
Expand Up @@ -620,7 +620,6 @@ CollapsibleHeader::CollapsibleHeader(bool *toggle, const std::string &text, Layo

void CollapsibleHeader::Draw(UIContext &dc) {
Style style = dc.theme->itemStyle;
style.background.color = 0;
if (HasFocus()) style = dc.theme->itemFocusedStyle;
if (down_) style = dc.theme->itemDownStyle;
if (!IsEnabled()) style = dc.theme->itemDisabledStyle;
Expand Down
25 changes: 25 additions & 0 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -670,6 +670,31 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
return cmd - start;
}

void DrawEngineCommon::SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim)) {
DispatchFlush();
}

// This isn't exactly right, if we flushed, since prims can straddle previous calls.
// But it generally works for common usage.
if (prim == GE_PRIM_KEEP_PREVIOUS) {
// Has to be set to something, let's assume POINTS (0) if no previous.
if (prevPrim_ == GE_PRIM_INVALID)
prevPrim_ = GE_PRIM_POINTS;
prim = prevPrim_;
} else {
prevPrim_ = prim;
}

// If vtype has changed, setup the vertex decoder.
if (vertTypeID != lastVType_ || !dec_) {
dec_ = GetVertexDecoder(vertTypeID);
lastVType_ = vertTypeID;
}

*bytesRead = vertexCount * dec_->VertexSize();
}

// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawVerts_ >= MAX_DEFERRED_DRAW_VERTS || numDrawInds_ >= MAX_DEFERRED_DRAW_INDS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
Expand Down
2 changes: 2 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Expand Up @@ -113,6 +113,8 @@ class DrawEngineCommon {

int ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle);
bool SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead);
void SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);

template<class Surface>
void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);
void ClearSplineBezierWeights();
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPU.h
Expand Up @@ -76,6 +76,7 @@ struct GPUStatistics {
void ResetFrame() {
numDrawCalls = 0;
numVertexDecodes = 0;
numCulledDraws = 0;
numDrawSyncs = 0;
numListSyncs = 0;
numVertsSubmitted = 0;
Expand Down Expand Up @@ -111,6 +112,7 @@ struct GPUStatistics {
// Per frame statistics
int numDrawCalls;
int numVertexDecodes;
int numCulledDraws;
int numDrawSyncs;
int numListSyncs;
int numFlushes;
Expand Down
54 changes: 50 additions & 4 deletions GPU/GPUCommonHW.cpp
Expand Up @@ -989,9 +989,36 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
int cullMode = gstate.getCullMode();

uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, true, &bytesRead)) {

#define MAX_CULL_CHECK_COUNT 6

#define PASSES_CULLING ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHT_MASK | GE_VTYPE_IDX_MASK)) || count > MAX_CULL_CHECK_COUNT)

// If certain conditions are true, do frustum culling.
bool passCulling = PASSES_CULLING;
if (!passCulling) {
// Do software culling.
if (drawEngineCommon_->TestBoundingBox(verts, inds, count, vertexType)) {
passCulling = true;
} else {
gpuStats.numCulledDraws++;
}
}

// If the first one in a batch passes, let's assume the whole batch passes.
// Cuts down on checking, while not losing that much efficiency.
bool onePassed = false;
if (passCulling) {
if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, true, &bytesRead)) {
canExtend = false;
}
onePassed = true;
} else {
// Still need to advance bytesRead.
drawEngineCommon_->SkipPrim(prim, count, vertTypeID, &bytesRead);
canExtend = false;
}

// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
Expand Down Expand Up @@ -1027,7 +1054,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
bool clockwise = !gstate.isCullEnabled() || gstate.getCullMode() == cullMode;
if (canExtend) {
// Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices
// are consecutive in memory.
// are consecutive in memory. We also ignore culling here.
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
int commandsExecuted = drawEngineCommon_->ExtendNonIndexedPrim(src, stall, vertTypeID, clockwise, &bytesRead, isTriangle);
if (!commandsExecuted) {
Expand All @@ -1047,7 +1074,25 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
// We can extend again after submitting a normal draw.
canExtend = isTriangle;
}
if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, clockwise, &bytesRead)) {

bool passCulling = onePassed || PASSES_CULLING;
if (!passCulling) {
// Do software culling.
if (drawEngineCommon_->TestBoundingBox(verts, inds, count, vertexType)) {
passCulling = true;
} else {
gpuStats.numCulledDraws++;
}
}
if (passCulling) {
if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, clockwise, &bytesRead)) {
canExtend = false;
}
// As soon as one passes, assume we don't need to check the rest of this batch.
onePassed = true;
} else {
// Still need to advance bytesRead.
drawEngineCommon_->SkipPrim(newPrim, count, vertTypeID, &bytesRead);
canExtend = false;
}
AdvanceVerts(vertexType, count, bytesRead);
Expand Down Expand Up @@ -1691,7 +1736,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
return snprintf(buffer, size,
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
"Draw: %d (%d dec), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
"Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
"Vertices: %d drawn: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
Expand All @@ -1705,6 +1750,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numListSyncs,
gpuStats.numDrawCalls,
gpuStats.numVertexDecodes,
gpuStats.numCulledDraws,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numBBOXJumps,
Expand Down

0 comments on commit 0905b6a

Please sign in to comment.