Skip to content

Commit

Permalink
Merge pull request #13454 from hrydgard/tex-fixes-2
Browse files Browse the repository at this point in the history
More texture cache cleanup. Show bytes of textures hashed per frame in debug stats.
  • Loading branch information
hrydgard committed Sep 20, 2020
2 parents bb09c56 + 2773336 commit c093d85
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 163 deletions.
17 changes: 6 additions & 11 deletions GPU/Common/FramebufferManagerCommon.cpp
Expand Up @@ -449,8 +449,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame

void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
// Notify the texture cache of both the color and depth buffers.
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED, NOTIFY_FB_DEPTH);
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
if (v->fbo) {
v->fbo->Release();
v->fbo = nullptr;
Expand Down Expand Up @@ -497,8 +496,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer
DownloadFramebufferOnSwitch(currentRenderVfb_);
}

textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED, NOTIFY_FB_DEPTH);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);

// Ugly...
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
Expand All @@ -512,8 +510,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer

void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
if (vfbFormatChanged) {
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
if (vfb->drawnFormat != vfb->format) {
ReformatFramebufferFrom(vfb, vfb->drawnFormat);
}
Expand Down Expand Up @@ -581,8 +578,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
} else {
if (vfb->fbo) {
// This should only happen very briefly when toggling useBufferedRendering_.
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED, NOTIFY_FB_DEPTH);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED);
vfb->fbo->Release();
vfb->fbo = nullptr;
}
Expand All @@ -594,8 +590,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
}
}
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);

// ugly... is all this needed?
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
Expand Down Expand Up @@ -1376,7 +1371,7 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
SetColorUpdated(vfb, 0);
char name[64];
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, (Draw::FBColorDepth)vfb->colorDepth, name });
vfbs_.push_back(vfb);

Expand Down
50 changes: 27 additions & 23 deletions GPU/Common/TextureCacheCommon.cpp
Expand Up @@ -737,24 +737,21 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c
entry->numFrames = 0;
}

void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel) {
// Mask to ignore the Z memory mirrors if the address is in VRAM.
// These checks are mainly to reduce scanning all textures.

void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
const u32 mirrorMask = 0x00600000;
const u32 address = channel == NOTIFY_FB_COLOR ? framebuffer->fb_address : framebuffer->z_address;
const u32 addr = Memory::IsVRAMAddress(address) ? (address & ~mirrorMask) : address;
const u32 bpp = (framebuffer->format == GE_FORMAT_8888 && channel == NOTIFY_FB_COLOR) ? 4 : 2;
const u32 stride = channel == NOTIFY_FB_COLOR ? framebuffer->fb_stride : framebuffer->z_stride;
const u32 fb_addr = framebuffer->fb_address;

const u32 z_addr = framebuffer->z_address & ~mirrorMask; // Probably unnecessary.

const u32 fb_bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
const u32 z_bpp = 2; // No other format exists.
const u32 fb_stride = framebuffer->fb_stride;
const u32 z_stride = framebuffer->z_stride;

// NOTE: Some games like Burnout massively misdetects the height of some framebuffers, leading to a lot of unnecessary invalidations.
// Let's only actually get rid of textures that cover the very start of the framebuffer.
const u32 endAddr = addr + stride * std::min((int)framebuffer->height, 16) * bpp;

const u64 cacheKey = (u64)addr << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
const u64 cacheKeyEnd = (u64)endAddr << 32;
const u32 fb_endAddr = fb_addr + fb_stride * std::min((int)framebuffer->height, 16) * fb_bpp;
const u32 z_endAddr = z_addr + z_stride * std::min((int)framebuffer->height, 16) * z_bpp;

switch (msg) {
case NOTIFY_FB_CREATED:
Expand All @@ -765,16 +762,22 @@ void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, Fram

std::vector<AttachCandidate> candidates;

// TODO: Rework this to not try to "apply" all matches, only the best one.
if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) {
// Color - no need to look in the mirrors.
for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
gpuStats.numTextureInvalidationsByFramebuffer++;
}
} else {
u64 cacheKey = (u64)fb_addr << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
u64 cacheKeyEnd = (u64)fb_endAddr << 32;

// Color - no need to look in the mirrors.
for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
gpuStats.numTextureInvalidationsByFramebuffer++;
}

if (z_stride != 0) {
// Depth. Just look at the range, but in each mirror (0x04200000 and 0x04600000).
// Games don't use 0x04400000 as far as I know - it has no swizzle effect so kinda useless.
cacheKey = (u64)z_addr << 32;
cacheKeyEnd = (u64)z_endAddr << 32;
for (auto it = cache_.lower_bound(cacheKey | 0x200000), end = cache_.upper_bound(cacheKeyEnd | 0x200000); it != end; ++it) {
it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
gpuStats.numTextureInvalidationsByFramebuffer++;
Expand Down Expand Up @@ -1759,8 +1762,9 @@ void TextureCacheCommon::Invalidate(u32 addr, int size, GPUInvalidationType type
// This is an active signal from the game that something in the texture cache may have changed.
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
} else {
// Do a quick check to see if the current texture is in range.
// Do a quick check to see if the current texture could potentially be in range.
const u32 currentAddr = gstate.getTextureAddress(0);
// TODO: This can be made tighter.
if (addr_end >= currentAddr && addr < currentAddr + LARGEST_TEXTURE_SIZE) {
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
}
Expand Down
4 changes: 3 additions & 1 deletion GPU/Common/TextureCacheCommon.h
Expand Up @@ -248,7 +248,7 @@ class TextureCacheCommon {

// FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to,
// so that it can invalidate TexCacheEntries pointed at those addresses.
void NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel);
void NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg);
void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt);

size_t NumLoadedTextures() const {
Expand Down Expand Up @@ -313,6 +313,8 @@ class TextureCacheCommon {
const u32 sizeInRAM = (textureBitsPerPixel[format] * bufw * h) / 8;
const u32 *checkp = (const u32 *)Memory::GetPointer(addr);

gpuStats.numTextureDataBytesHashed += sizeInRAM;

if (Memory::IsValidAddress(addr + sizeInRAM)) {
return DoQuickTexHash(checkp, sizeInRAM);
} else {
Expand Down
39 changes: 7 additions & 32 deletions GPU/D3D11/GPU_D3D11.cpp
Expand Up @@ -304,38 +304,13 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) {
}

void GPU_D3D11::GetStats(char *buffer, size_t bufsize) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
snprintf(buffer, bufsize - 1,
"DL processing time: %0.2f ms\n"
"Draw calls: %i, flushes %i, clears %i (cached: %d)\n"
"Num Tracked Vertex Arrays: %i\n"
"GPU cycles executed: %d (%f per vertex)\n"
"Commands per call level: %i %i %i %i\n"
"Vertices submitted: %i\n"
"Cached, Uncached Vertices Drawn: %i, %i\n"
"FBOs active: %i (evaluations: %d)\n"
"Textures active: %i, decoded: %i invalidated: %i\n"
"Readbacks: %d, uploads: %d\n"
"Vertex, Fragment shaders loaded: %i, %i\n",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles,
gpuStats.gpuCommandsAtCallLevel[0], gpuStats.gpuCommandsAtCallLevel[1], gpuStats.gpuCommandsAtCallLevel[2], gpuStats.gpuCommandsAtCallLevel[3],
gpuStats.numVertsSubmitted,
gpuStats.numCachedVertsDrawn,
gpuStats.numUncachedVertsDrawn,
(int)framebufferManagerD3D11_->NumVFBs(),
gpuStats.numFramebufferEvaluations,
(int)textureCacheD3D11_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
gpuStats.numReadbacks,
gpuStats.numUploads,
size_t offset = FormatGPUStatsCommon(buffer, bufsize);
buffer += offset;
bufsize -= offset;
if ((int)bufsize < 0)
return;
snprintf(buffer, bufsize,
"Vertex, Fragment shaders loaded: %d, %d\n",
shaderManagerD3D11_->GetNumVertexShaders(),
shaderManagerD3D11_->GetNumFragmentShaders()
);
Expand Down
37 changes: 6 additions & 31 deletions GPU/Directx9/GPU_DX9.cpp
Expand Up @@ -344,38 +344,13 @@ void GPU_DX9::ExecuteOp(u32 op, u32 diff) {
}

void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
snprintf(buffer, bufsize - 1,
"DL processing time: %0.2f ms\n"
"Draw calls: %i, flushes %i, clears %i (cached: %d)\n"
"Num Tracked Vertex Arrays: %i\n"
"GPU cycles executed: %d (%f per vertex)\n"
"Commands per call level: %i %i %i %i\n"
"Vertices submitted: %i\n"
"Cached, Uncached Vertices Drawn: %i, %i\n"
"FBOs active: %i (evaluations: %d)\n"
"Textures active: %i, decoded: %i invalidated: %i\n"
"Readbacks: %d, uploads: %d\n"
size_t offset = FormatGPUStatsCommon(buffer, bufsize);
buffer += offset;
bufsize -= offset;
if ((int)bufsize < 0)
return;
snprintf(buffer, bufsize,
"Vertex, Fragment shaders loaded: %i, %i\n",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles,
gpuStats.gpuCommandsAtCallLevel[0], gpuStats.gpuCommandsAtCallLevel[1], gpuStats.gpuCommandsAtCallLevel[2], gpuStats.gpuCommandsAtCallLevel[3],
gpuStats.numVertsSubmitted,
gpuStats.numCachedVertsDrawn,
gpuStats.numUncachedVertsDrawn,
(int)framebufferManagerDX9_->NumVFBs(),
gpuStats.numFramebufferEvaluations,
(int)textureCacheDX9_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
gpuStats.numReadbacks,
gpuStats.numUploads,
shaderManagerDX9_->GetNumVertexShaders(),
shaderManagerDX9_->GetNumFragmentShaders()
);
Expand Down
42 changes: 9 additions & 33 deletions GPU/GLES/GPU_GLES.cpp
Expand Up @@ -445,41 +445,17 @@ void GPU_GLES::ExecuteOp(u32 op, u32 diff) {
}

void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
snprintf(buffer, bufsize - 1,
"DL processing time: %0.2f ms\n"
"Draw calls: %i, flushes %i, clears %i (cached: %d)\n"
"Num Tracked Vertex Arrays: %i\n"
"GPU cycles executed: %d (%f per vertex)\n"
"Commands per call level: %i %i %i %i\n"
"Vertices submitted: %i\n"
"Cached, Uncached Vertices Drawn: %i, %i\n"
"FBOs active: %i (evaluations: %d)\n"
"Textures active: %i, decoded: %i invalidated: %i\n"
"Readbacks: %d, uploads: %d\n"
"Vertex, Fragment, Programs loaded: %i, %i, %i\n",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles,
gpuStats.gpuCommandsAtCallLevel[0], gpuStats.gpuCommandsAtCallLevel[1], gpuStats.gpuCommandsAtCallLevel[2], gpuStats.gpuCommandsAtCallLevel[3],
gpuStats.numVertsSubmitted,
gpuStats.numCachedVertsDrawn,
gpuStats.numUncachedVertsDrawn,
(int)framebufferManagerGL_->NumVFBs(),
gpuStats.numFramebufferEvaluations,
(int)textureCacheGL_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
gpuStats.numReadbacks,
gpuStats.numUploads,
size_t offset = FormatGPUStatsCommon(buffer, bufsize);
buffer += offset;
bufsize -= offset;
if ((int)bufsize < 0)
return;
snprintf(buffer, bufsize,
"Vertex, Fragment, Programs loaded: %d, %d, %d\n",
shaderManagerGL_->GetNumVertexShaders(),
shaderManagerGL_->GetNumFragmentShaders(),
shaderManagerGL_->GetNumPrograms());
shaderManagerGL_->GetNumPrograms()
);
}

void GPU_GLES::ClearCacheNextFrame() {
Expand Down
5 changes: 5 additions & 0 deletions GPU/GPU.h
Expand Up @@ -55,6 +55,7 @@ struct GPUStatistics {
// Never add a vtable :)
memset(this, 0, sizeof(*this));
}

void ResetFrame() {
numDrawCalls = 0;
numCachedDrawCalls = 0;
Expand All @@ -64,7 +65,9 @@ struct GPUStatistics {
numTrackedVertexArrays = 0;
numTextureInvalidations = 0;
numTextureInvalidationsByFramebuffer = 0;
numTexturesHashed = 0;
numTextureSwitches = 0;
numTextureDataBytesHashed = 0;
numShaderSwitches = 0;
numFlushes = 0;
numTexturesDecoded = 0;
Expand All @@ -88,6 +91,8 @@ struct GPUStatistics {
int numTrackedVertexArrays;
int numTextureInvalidations;
int numTextureInvalidationsByFramebuffer;
int numTexturesHashed;
int numTextureDataBytesHashed;
int numTextureSwitches;
int numShaderSwitches;
int numTexturesDecoded;
Expand Down
35 changes: 35 additions & 0 deletions GPU/GPUCommon.cpp
Expand Up @@ -2876,3 +2876,38 @@ bool GPUCommon::FramebufferReallyDirty() {
}
return true;
}

size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
return snprintf(buffer, size,
"DL processing time: %0.2f ms\n"
"Draw calls: %d, flushes %d, clears %d (cached: %d)\n"
"Num Tracked Vertex Arrays: %d\n"
"Commands per call level: %i %i %i %i\n"
"Vertices: %d cached: %d uncached: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
"Readbacks: %d, uploads: %d\n"
"GPU cycles executed: %d (%f per vertex)\n",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.gpuCommandsAtCallLevel[0], gpuStats.gpuCommandsAtCallLevel[1], gpuStats.gpuCommandsAtCallLevel[2], gpuStats.gpuCommandsAtCallLevel[3],
gpuStats.numVertsSubmitted,
gpuStats.numCachedVertsDrawn,
gpuStats.numUncachedVertsDrawn,
(int)framebufferManager_->NumVFBs(),
gpuStats.numFramebufferEvaluations,
(int)textureCache_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
gpuStats.numTextureDataBytesHashed / 1024,
gpuStats.numReadbacks,
gpuStats.numUploads,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles
);
}
2 changes: 2 additions & 0 deletions GPU/GPUCommon.h
Expand Up @@ -303,6 +303,8 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface {
}
}

size_t FormatGPUStatsCommon(char *buf, size_t size);

FramebufferManagerCommon *framebufferManager_ = nullptr;
TextureCacheCommon *textureCache_ = nullptr;
DrawEngineCommon *drawEngineCommon_ = nullptr;
Expand Down

0 comments on commit c093d85

Please sign in to comment.