Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache framebuffer copies (for self-texturing) until the next TexFlush GPU instruction #17032

Merged
merged 4 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/Common/DepthBufferCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ bool FramebufferManagerCommon::ReadbackDepthbuffer(Draw::Framebuffer *fbo, int x
}

shaderManager_->DirtyLastShader();
auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width() * scaleX, fbo->Height() * scaleY);
auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width() * scaleX, fbo->Height() * scaleY);
draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync");
Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f };
draw_->SetViewport(viewport);
Expand Down
34 changes: 32 additions & 2 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
}

DiscardFramebufferCopy();

// We already have it!
} else if (vfb != currentRenderVfb_) {
// Use it as a render target.
Expand All @@ -562,6 +564,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer
NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
CopyToColorFromOverlappingFramebuffers(vfb);
gstate_c.usingDepth = false; // reset depth buffer tracking

DiscardFramebufferCopy();
} else {
// Something changed, but we still got the same framebuffer we were already rendering to.
// Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated
Expand Down Expand Up @@ -928,6 +932,7 @@ void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
}

// Wipe some pointers
DiscardFramebufferCopy();
if (currentRenderVfb_ == v)
currentRenderVfb_ = nullptr;
if (displayFramebuf_ == v)
Expand Down Expand Up @@ -1233,13 +1238,27 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
// Self-texturing, need a copy currently (some backends can potentially support it though).
WARN_LOG_ONCE(selfTextureCopy, G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
if (currentFramebufferCopy_) {
// We have a copy already that hasn't been invalidated, let's keep using it.
draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::FB_COLOR_BIT, layer);
return true;
}

Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
if (renderCopy) {
VirtualFramebuffer copyInfo = *framebuffer;
copyInfo.fbo = renderCopy;
CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags, layer);

bool partial = false;
CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags, layer, &partial);
RebindFramebuffer("After BindFramebufferAsColorTexture");
draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, layer);

// Only cache the copy if it wasn't a partial copy.
// TODO: Improve on this.
if (!partial) {
currentFramebufferCopy_ = renderCopy;
}
gpuStats.numCopiesForSelfTex++;
} else {
// Failed to get temp FBO? Weird.
Expand All @@ -1262,14 +1281,17 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
}
}

void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer) {
void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial) {
int x = 0;
int y = 0;
int w = src->drawnWidth;
int h = src->drawnHeight;

*partial = false;

// If max is not > min, we probably could not detect it. Skip.
// See the vertex decoder, where this is updated.
// TODO: We're currently not hitting this path in Dante. See #17032
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably not using vertex coords that get checked, so maxU probably equals minU. We don't currently set the range for all vertex types, just 16-bit through iirc. This has been true forever, initially to avoid any perf impact to vertex decode for the few games this was initially helping.

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right. Possibly it would always be worth it to find the min/max in through mode, although then this change would need more sophistication. So, maybe one day :)

if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
x = std::max(gstate_c.vertBounds.minU, (u16)0);
y = std::max(gstate_c.vertBounds.minV, (u16)0);
Expand All @@ -1287,6 +1309,9 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer
}

if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
if (x != 0 || y != 0 || w < src->drawnWidth || h < src->drawnHeight) {
*partial = true;
}
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
}
}
Expand Down Expand Up @@ -1426,6 +1451,7 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int
// PresentationCommon sets all kinds of state, we can't rely on anything.
gstate_c.Dirty(DIRTY_ALL);

DiscardFramebufferCopy();
currentRenderVfb_ = nullptr;
}

Expand Down Expand Up @@ -1583,10 +1609,12 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
// This may get called mid-draw if the game uses an immediate flip.
// PresentationCommon sets all kinds of state, we can't rely on anything.
gstate_c.Dirty(DIRTY_ALL);
DiscardFramebufferCopy();
currentRenderVfb_ = nullptr;
}

void FramebufferManagerCommon::DecimateFBOs() {
DiscardFramebufferCopy();
currentRenderVfb_ = nullptr;

for (auto iter : fbosToDelete_) {
Expand Down Expand Up @@ -1743,6 +1771,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
} else {
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
}
DiscardFramebufferCopy();
currentRenderVfb_ = vfb;

if (!vfb->fbo) {
Expand Down Expand Up @@ -2544,6 +2573,7 @@ void FramebufferManagerCommon::NotifyConfigChanged() {
}

void FramebufferManagerCommon::DestroyAllFBOs() {
DiscardFramebufferCopy();
currentRenderVfb_ = nullptr;
displayFramebuf_ = nullptr;
prevDisplayFramebuf_ = nullptr;
Expand Down
10 changes: 9 additions & 1 deletion GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ enum class TempFBO {
BLIT,
// For copies of framebuffers (e.g. shader blending.)
COPY,
// Used for copies when setting color to depth.
Z_COPY,
// Used to copy stencil data, means we need a stencil backing.
STENCIL,
};
Expand Down Expand Up @@ -466,6 +468,10 @@ class FramebufferManagerCommon {
return msaaLevel_;
}

void DiscardFramebufferCopy() {
currentFramebufferCopy_ = nullptr;
}

protected:
virtual void ReadbackFramebuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode);
// Used for when a shader is required, such as GLES.
Expand All @@ -486,7 +492,7 @@ class FramebufferManagerCommon {
// Used by ReadFramebufferToMemory and later framebuffer block copies
void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag);

void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer);
void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial);

void EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height);

Expand Down Expand Up @@ -552,6 +558,8 @@ class FramebufferManagerCommon {

VirtualFramebuffer *currentRenderVfb_ = nullptr;

Draw::Framebuffer *currentFramebufferCopy_ = nullptr;

// The range of PSP memory that may contain FBOs. So we can skip iterating.
u32 framebufRangeEnd_ = 0;

Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/StencilBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ bool FramebufferManagerGLES::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x
}

shaderManager_->DirtyLastShader();
auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height());
auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width(), fbo->Height());
draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync");
Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f };
draw_->SetViewport(viewport);
Expand Down
9 changes: 8 additions & 1 deletion GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },

// Ignored commands
{ GE_CMD_TEXFLUSH, 0 },
{ GE_CMD_TEXFLUSH, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexFlush },
{ GE_CMD_TEXSYNC, 0 },

// These are just nop or part of other later commands.
Expand Down Expand Up @@ -1619,6 +1619,13 @@ void GPUCommonHW::Execute_BoneMtxData(u32 op, u32 diff) {
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}

void GPUCommonHW::Execute_TexFlush(u32 op, u32 diff) {
// Games call this when they need the effect of drawing to be visible to texturing.
// And for a bunch of other reasons, but either way, this is what we need to do.
// It's possible we could also use this as a hint for the texture cache somehow.
framebufferManager_->DiscardFramebufferCopy();
}

size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
return snprintf(buffer, size,
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPUCommonHW.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class GPUCommonHW : public GPUCommon {
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);

void Execute_TexFlush(u32 op, u32 diff);

typedef void (GPUCommonHW::*CmdFunc)(u32 op, u32 diff);

void FastRunLoop(DisplayList &list) override;
Expand Down