From 05b6bbdc5658b0f1031ddd7b91c5d9c03b479347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 May 2023 14:38:11 +0200 Subject: [PATCH] Add a trivial profiling tool to the OpenGL backend - meaasure the time it takes to run a frame of commands. Accessed from the in-game dev menu just like the Vulkan frame profiler. With this we can easily see that actually submitting the GL commands is often the bottleneck on old devices like a Galaxy S3. --- Common/GPU/OpenGL/GLFrameData.h | 9 +++++++++ Common/GPU/OpenGL/GLRenderManager.cpp | 20 +++++++++++++++++++- Common/GPU/OpenGL/GLRenderManager.h | 4 +++- Common/GPU/OpenGL/thin3d_gl.cpp | 7 ++++++- GPU/Common/GPUDebugInterface.h | 3 +++ GPU/GLES/GPU_GLES.cpp | 5 +++++ GPU/GLES/GPU_GLES.h | 2 ++ GPU/Vulkan/DebugVisVulkan.cpp | 11 +++++++++-- GPU/Vulkan/GPU_Vulkan.h | 2 +- UI/DevScreens.cpp | 2 ++ UI/EmuScreen.cpp | 2 +- 11 files changed, 60 insertions(+), 7 deletions(-) diff --git a/Common/GPU/OpenGL/GLFrameData.h b/Common/GPU/OpenGL/GLFrameData.h index a50fbe7d55d4..571b3d8abe01 100644 --- a/Common/GPU/OpenGL/GLFrameData.h +++ b/Common/GPU/OpenGL/GLFrameData.h @@ -35,6 +35,13 @@ class GLDeleter { std::vector pushBuffers; }; +struct GLQueueProfileContext { + bool enabled; + double cpuStartTime; + double cpuEndTime; +}; + + // Per-frame data, round-robin so we can overlap submission with execution of the previous frame. struct GLFrameData { bool skipSwap = false; @@ -49,4 +56,6 @@ struct GLFrameData { GLDeleter deleter; GLDeleter deleter_prev; std::set activePushBuffers; + + GLQueueProfileContext profile; }; diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index a28ab3e4cc3c..7d094a1d5188 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -8,6 +8,7 @@ #include "Common/Log.h" #include "Common/TimeUtil.h" #include "Common/MemoryUtil.h" +#include "Common/StringUtils.h" #include "Common/Math/math_util.h" #if 0 // def _DEBUG @@ -187,6 +188,14 @@ void GLRenderManager::StopThread() { } } +std::string GLRenderManager::GetGpuProfileString() const { + int curFrame = GetCurFrame(); + const GLQueueProfileContext &profile = frameData_[curFrame].profile; + + float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime); + return StringFromFormat("CPU time to run the list: %0.2f ms", cputime_ms); +} + void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { _assert_(insideFrame_); #ifdef _DEBUG @@ -341,7 +350,7 @@ void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, i queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels); } -void GLRenderManager::BeginFrame() { +void GLRenderManager::BeginFrame(bool enableProfiling) { #ifdef _DEBUG curProgram_ = nullptr; #endif @@ -349,6 +358,7 @@ void GLRenderManager::BeginFrame() { int curFrame = GetCurFrame(); GLFrameData &frameData = frameData_[curFrame]; + frameData.profile.enabled = enableProfiling; { VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d()); std::unique_lock lock(frameData.fenceMutex); @@ -417,6 +427,10 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) { } } + if (frameData.profile.enabled) { + frameData.profile.cpuStartTime = time_now_d(); + } + if (IsVREnabled()) { int passes = GetVRPassesCount(); for (int i = 0; i < passes; i++) { @@ -428,6 +442,10 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) { queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false); } + if (frameData.profile.enabled) { + frameData.profile.cpuEndTime = time_now_d(); + } + if (!skipGLCalls_) { for (auto iter : frameData.activePushBuffers) { iter->MapDevice(bufferStrategy_); diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 0120204d15cd..a926dcd41fb3 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -409,8 +409,10 @@ class GLRenderManager { caps_ = caps; } + std::string GetGpuProfileString() const; + // Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again. - void BeginFrame(); + void BeginFrame(bool enableProfiling); // Can run on a different thread! void Finish(); diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index aae890ee182f..926bd78aead4 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -328,6 +328,9 @@ class OpenGLContext : public DrawContext { DrawContext::SetTargetSize(w, h); renderManager_.Resize(w, h); } + void SetDebugFlags(DebugFlags flags) override { + debugFlags_ = flags; + } const DeviceCaps &GetDeviceCaps() const override { return caps_; @@ -514,6 +517,8 @@ class OpenGLContext : public DrawContext { GLPushBuffer *push; }; FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES]{}; + + DebugFlags debugFlags_ = DebugFlags::NONE; }; static constexpr int MakeIntelSimpleVer(int v1, int v2, int v3) { @@ -778,7 +783,7 @@ OpenGLContext::~OpenGLContext() { } void OpenGLContext::BeginFrame() { - renderManager_.BeginFrame(); + renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS); FrameData &frameData = frameData_[renderManager_.GetCurFrame()]; renderManager_.BeginPushBuffer(frameData.push); } diff --git a/GPU/Common/GPUDebugInterface.h b/GPU/Common/GPUDebugInterface.h index b67fca66f3c6..54ff31481e4d 100644 --- a/GPU/Common/GPUDebugInterface.h +++ b/GPU/Common/GPUDebugInterface.h @@ -257,6 +257,9 @@ class GPUDebugInterface { // cached framebuffers / textures / vertices? // get content of specific framebuffer / texture? // vertex / texture decoding? + + // Note: Wanted to name it GetProfileString but clashes with a Windows API. + virtual std::string GetGpuProfileString() { return ""; } }; bool GPUDebugInitExpression(GPUDebugInterface *g, const char *str, PostfixExpression &exp); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 7a0ddb7d8950..963a10018cdf 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -305,3 +305,8 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) { shaderManagerGL_->GetNumPrograms() ); } + +std::string GPU_GLES::GetGpuProfileString() { + GLRenderManager *rm = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); + return rm->GetGpuProfileString(); +} diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h index 9e03285ee99b..5b56504eead4 100644 --- a/GPU/GLES/GPU_GLES.h +++ b/GPU/GLES/GPU_GLES.h @@ -51,6 +51,8 @@ class GPU_GLES : public GPUCommonHW { void BeginHostFrame() override; void EndHostFrame() override; + std::string GetGpuProfileString() override; + protected: void FinishDeferred() override; diff --git a/GPU/Vulkan/DebugVisVulkan.cpp b/GPU/Vulkan/DebugVisVulkan.cpp index 1c1cf1ff7a20..36b869f074f9 100644 --- a/GPU/Vulkan/DebugVisVulkan.cpp +++ b/GPU/Vulkan/DebugVisVulkan.cpp @@ -37,6 +37,8 @@ #include "GPU/Vulkan/VulkanUtil.h" #include "GPU/Vulkan/TextureCacheVulkan.h" +#include "Core/Config.h" + #undef DrawText bool comparePushBufferNames(const VulkanMemoryManager *a, const VulkanMemoryManager *b) { @@ -107,9 +109,14 @@ void DrawGPUProfilerVis(UIContext *ui, GPUInterface *gpu) { ui->Begin(); - GPU_Vulkan *gpuVulkan = static_cast(gpu); + float scale = 0.4f; + if (g_Config.iGPUBackend == (int)GPUBackend::OPENGL) { + // Don't have as much info, let's go bigger. + scale = 0.7f; + } - std::string text = gpuVulkan->GetGpuProfileString(); + GPUCommon *gpuCommon = static_cast(gpu); + std::string text = gpuCommon->GetGpuProfileString(); ui->SetFontScale(0.4f, 0.4f); ui->DrawTextShadow(text.c_str(), x, y, 0xFFFFFFFF, FLAG_DYNAMIC_ASCII); diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 78229a77f264..1c7f76756aa6 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -59,7 +59,7 @@ class GPU_Vulkan : public GPUCommonHW { return textureCacheVulkan_; } - std::string GetGpuProfileString(); + std::string GetGpuProfileString() override; protected: void FinishDeferred() override; diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 59feb097ded3..61c7d1ec773a 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -102,6 +102,8 @@ void DevMenuScreen::CreatePopupContents(UI::ViewGroup *parent) { items->Add(new Choice(dev->T("Shader Viewer")))->OnClick.Handle(this, &DevMenuScreen::OnShaderView); if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) { items->Add(new CheckBox(&g_Config.bShowAllocatorDebug, dev->T("Allocator Viewer"))); + } + if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN || g_Config.iGPUBackend == (int)GPUBackend::OPENGL) { items->Add(new CheckBox(&g_Config.bShowGpuProfile, dev->T("GPU Profile"))); } items->Add(new Choice(dev->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenuScreen::OnFreezeFrame); diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 5aa8e60fa58b..0710ce0ffa85 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -1616,7 +1616,7 @@ void EmuScreen::renderUI() { DrawAllocatorVis(ctx, gpu); } - if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN && g_Config.bShowGpuProfile) { + if ((g_Config.iGPUBackend == (int)GPUBackend::VULKAN || g_Config.iGPUBackend == (int)GPUBackend::OPENGL) && g_Config.bShowGpuProfile) { DrawGPUProfilerVis(ctx, gpu); }