diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 8a4fc0fd9860..2444d04b2c98 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1208,7 +1208,6 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int vfb ? vfb->bufferHeight : g_display.pixel_yres, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags); - gpuStats.numUploads++; draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE); gstate_c.Dirty(DIRTY_ALL_RENDER_STATE); @@ -1324,6 +1323,19 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G } } + int bpp = BufferFormatBytesPerPixel(srcPixelFormat); + int srcStrideInBytes = srcStride * bpp; + int widthInBytes = width * bpp; + + // Compute hash of contents. + XXH3_state_t *hashState = XXH3_createState(); + XXH3_64bits_reset(hashState); + for (int y = 0; y < height; y++) { + XXH3_64bits_update(hashState, srcPixels + srcStrideInBytes, widthInBytes); + } + uint64_t imageHash = XXH3_64bits_digest(hashState); + XXH3_freeState(hashState); + // TODO: We can just change the texture format and flip some bits around instead of this. // Could share code with the texture cache perhaps. auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) { @@ -1396,16 +1408,28 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G int frameNumber = draw_->GetFrameCount(); - // Look for a matching texture we can re-use. + // First look for an exact match (including contents hash) that we can re-use. + for (auto &iter : drawPixelsCache_) { + if (iter.contentsHash == imageHash && iter.tex->Width() == width && iter.tex->Height() == height && iter.tex->Format() == texFormat) { + iter.frameNumber = frameNumber; + gpuStats.numCachedUploads++; + return iter.tex; + } + } + + // Then, look for an alternative one that's not been used recently that we can overwrite. for (auto &iter : drawPixelsCache_) { if (iter.frameNumber >= frameNumber - 3 || iter.tex->Width() != width || iter.tex->Height() != height || iter.tex->Format() != texFormat) { continue; } // OK, current one seems good, let's use it (and mark it used). + gpuStats.numUploads++; draw_->UpdateTextureLevels(iter.tex, &srcPixels, generateTexture, 1); // NOTE: numFlips is no good - this is called every frame when paused sometimes! iter.frameNumber = frameNumber; + // We need to update the hash for future matching. + iter.contentsHash = imageHash; return iter.tex; } @@ -1435,8 +1459,9 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G // INFO_LOG(G3D, "Creating drawPixelsCache texture: %dx%d", tex->Width(), tex->Height()); - DrawPixelsEntry entry{ tex, frameNumber }; + DrawPixelsEntry entry{ tex, imageHash, frameNumber }; drawPixelsCache_.push_back(entry); + gpuStats.numUploads++; return tex; } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 20bf5a75ed08..76fa57852372 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -269,6 +269,7 @@ class DrawContext; struct DrawPixelsEntry { Draw::Texture *tex; + uint64_t contentsHash; int frameNumber; }; diff --git a/GPU/GPU.h b/GPU/GPU.h index cbd148078cf0..7d4d4d1c0a07 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -92,6 +92,7 @@ struct GPUStatistics { numBlockingReadbacks = 0; numReadbacks = 0; numUploads = 0; + numCachedUploads = 0; numDepal = 0; numClears = 0; numDepthCopies = 0; @@ -126,6 +127,7 @@ struct GPUStatistics { int numBlockingReadbacks; int numReadbacks; int numUploads; + int numCachedUploads; int numDepal; int numClears; int numDepthCopies; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 4491d08369b3..8170c0658d1a 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -1688,7 +1688,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { "Vertices: %d drawn: %d\n" "FBOs active: %d (evaluations: %d)\n" "Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n" - "readbacks %d (%d non-block), uploads %d, depal %d\n" + "readbacks %d (%d non-block), upload %d (cached %d), depal %d\n" "block transfers: %d\n" "replacer: tracks %d references, %d unique textures\n" "Cpy: depth %d, color %d, reint %d, blend %d, self %d\n" @@ -1713,6 +1713,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numBlockingReadbacks, gpuStats.numReadbacks, gpuStats.numUploads, + gpuStats.numCachedUploads, gpuStats.numDepal, gpuStats.numBlockTransfers, gpuStats.numReplacerTrackedTex, diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index cdb7cbce58af..e425dbeef791 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -460,10 +460,10 @@ inline bool IsTextureFormat16Bit(GETextureFormat tfmt) { inline int BufferFormatBytesPerPixel(GEBufferFormat format) { switch (format) { - case GE_FORMAT_8888: return 4; // applies to depth as well. + case GE_FORMAT_8888: return 4; case GE_FORMAT_CLUT8: return 1; default: - return 2; + return 2; // works for depth as well as the 16-bit color formats. } }