diff --git a/GPU/Common/DepthBufferCommon.cpp b/GPU/Common/DepthBufferCommon.cpp index 46a2ecfb6fcb..a29c99577575 100644 --- a/GPU/Common/DepthBufferCommon.cpp +++ b/GPU/Common/DepthBufferCommon.cpp @@ -204,7 +204,7 @@ bool FramebufferManagerCommon::ReadbackDepthbuffer(Draw::Framebuffer *fbo, int x } shaderManager_->DirtyLastShader(); - auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width() * scaleX, fbo->Height() * scaleY); + auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width() * scaleX, fbo->Height() * scaleY); draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync"); Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f }; draw_->SetViewport(viewport); diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 91005fd71ca7..e21bc35e9378 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -546,6 +546,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer // TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?) } + DiscardFramebufferCopy(); + // We already have it! } else if (vfb != currentRenderVfb_) { // Use it as a render target. @@ -562,6 +564,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth); CopyToColorFromOverlappingFramebuffers(vfb); gstate_c.usingDepth = false; // reset depth buffer tracking + + DiscardFramebufferCopy(); } else { // Something changed, but we still got the same framebuffer we were already rendering to. // Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated @@ -928,6 +932,7 @@ void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { } // Wipe some pointers + DiscardFramebufferCopy(); if (currentRenderVfb_ == v) currentRenderVfb_ = nullptr; if (displayFramebuf_ == v) @@ -1233,13 +1238,27 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF // Self-texturing, need a copy currently (some backends can potentially support it though). WARN_LOG_ONCE(selfTextureCopy, G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags); // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. + if (currentFramebufferCopy_) { + // We have a copy already that hasn't been invalidated, let's keep using it. + draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::FB_COLOR_BIT, layer); + return true; + } + Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight); if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; copyInfo.fbo = renderCopy; - CopyFramebufferForColorTexture(©Info, framebuffer, flags, layer); + + bool partial = false; + CopyFramebufferForColorTexture(©Info, framebuffer, flags, layer, &partial); RebindFramebuffer("After BindFramebufferAsColorTexture"); draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, layer); + + // Only cache the copy if it wasn't a partial copy. + // TODO: Improve on this. + if (!partial) { + currentFramebufferCopy_ = renderCopy; + } gpuStats.numCopiesForSelfTex++; } else { // Failed to get temp FBO? Weird. @@ -1262,14 +1281,17 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF } } -void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer) { +void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial) { int x = 0; int y = 0; int w = src->drawnWidth; int h = src->drawnHeight; + *partial = false; + // If max is not > min, we probably could not detect it. Skip. // See the vertex decoder, where this is updated. + // TODO: We're currently not hitting this path in Dante. See #17032 if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) { x = std::max(gstate_c.vertBounds.minU, (u16)0); y = std::max(gstate_c.vertBounds.minV, (u16)0); @@ -1287,6 +1309,9 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer } if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) { + if (x != 0 || y != 0 || w < src->drawnWidth || h < src->drawnHeight) { + *partial = true; + } BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture"); } } @@ -1426,6 +1451,7 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int // PresentationCommon sets all kinds of state, we can't rely on anything. gstate_c.Dirty(DIRTY_ALL); + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; } @@ -1583,10 +1609,12 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) { // This may get called mid-draw if the game uses an immediate flip. // PresentationCommon sets all kinds of state, we can't rely on anything. gstate_c.Dirty(DIRTY_ALL); + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; } void FramebufferManagerCommon::DecimateFBOs() { + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; for (auto iter : fbosToDelete_) { @@ -1743,6 +1771,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, } else { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO"); } + DiscardFramebufferCopy(); currentRenderVfb_ = vfb; if (!vfb->fbo) { @@ -2544,6 +2573,7 @@ void FramebufferManagerCommon::NotifyConfigChanged() { } void FramebufferManagerCommon::DestroyAllFBOs() { + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; displayFramebuf_ = nullptr; prevDisplayFramebuf_ = nullptr; diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 672130f7aa01..d0c83b4d6c11 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -217,6 +217,8 @@ enum class TempFBO { BLIT, // For copies of framebuffers (e.g. shader blending.) COPY, + // Used for copies when setting color to depth. + Z_COPY, // Used to copy stencil data, means we need a stencil backing. STENCIL, }; @@ -466,6 +468,10 @@ class FramebufferManagerCommon { return msaaLevel_; } + void DiscardFramebufferCopy() { + currentFramebufferCopy_ = nullptr; + } + protected: virtual void ReadbackFramebuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode); // Used for when a shader is required, such as GLES. @@ -486,7 +492,7 @@ class FramebufferManagerCommon { // Used by ReadFramebufferToMemory and later framebuffer block copies void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag); - void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer); + void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial); void EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height); @@ -552,6 +558,8 @@ class FramebufferManagerCommon { VirtualFramebuffer *currentRenderVfb_ = nullptr; + Draw::Framebuffer *currentFramebufferCopy_ = nullptr; + // The range of PSP memory that may contain FBOs. So we can skip iterating. u32 framebufRangeEnd_ = 0; diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index 763e5a95fe76..845cd2b3ac76 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -130,7 +130,7 @@ bool FramebufferManagerGLES::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x } shaderManager_->DirtyLastShader(); - auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height()); + auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width(), fbo->Height()); draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync"); Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; draw_->SetViewport(viewport); diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 5c51cf5e8a67..01d6dd02c2ce 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -287,7 +287,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 }, // Ignored commands - { GE_CMD_TEXFLUSH, 0 }, + { GE_CMD_TEXFLUSH, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexFlush }, { GE_CMD_TEXSYNC, 0 }, // These are just nop or part of other later commands. @@ -1619,6 +1619,13 @@ void GPUCommonHW::Execute_BoneMtxData(u32 op, u32 diff) { gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24; } +void GPUCommonHW::Execute_TexFlush(u32 op, u32 diff) { + // Games call this when they need the effect of drawing to be visible to texturing. + // And for a bunch of other reasons, but either way, this is what we need to do. + // It's possible we could also use this as a hint for the texture cache somehow. + framebufferManager_->DiscardFramebufferCopy(); +} + size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; return snprintf(buffer, size, diff --git a/GPU/GPUCommonHW.h b/GPU/GPUCommonHW.h index 2e13a1173845..000993be79fc 100644 --- a/GPU/GPUCommonHW.h +++ b/GPU/GPUCommonHW.h @@ -60,6 +60,8 @@ class GPUCommonHW : public GPUCommon { void Execute_BoneMtxNum(u32 op, u32 diff); void Execute_BoneMtxData(u32 op, u32 diff); + void Execute_TexFlush(u32 op, u32 diff); + typedef void (GPUCommonHW::*CmdFunc)(u32 op, u32 diff); void FastRunLoop(DisplayList &list) override;