Skip to content

Commit

Permalink
Merge pull request #16196 from hrydgard/improved-render-stats
Browse files Browse the repository at this point in the history
Improved stats in the Vulkan GPU profiler
  • Loading branch information
hrydgard committed Oct 10, 2022
2 parents 50285f6 + d4bfe92 commit d56bdcb
Show file tree
Hide file tree
Showing 21 changed files with 126 additions and 67 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,8 @@ add_library(Common STATIC
Common/Math/lin/vec3.h
Common/Math/math_util.cpp
Common/Math/math_util.h
Common/Math/Statistics.h
Common/Math/Statistics.cpp
Common/Net/HTTPClient.cpp
Common/Net/HTTPClient.h
Common/Net/HTTPHeaders.cpp
Expand Down
2 changes: 2 additions & 0 deletions Common/Common.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@
<ClInclude Include="Math\lin\matrix4x4.h" />
<ClInclude Include="Math\lin\vec3.h" />
<ClInclude Include="Math\math_util.h" />
<ClInclude Include="Math\Statistics.h" />
<ClInclude Include="Net\NetBuffer.h" />
<ClInclude Include="Net\HTTPClient.h" />
<ClInclude Include="Net\HTTPHeaders.h" />
Expand Down Expand Up @@ -886,6 +887,7 @@
<ClCompile Include="Math\lin\matrix4x4.cpp" />
<ClCompile Include="Math\lin\vec3.cpp" />
<ClCompile Include="Math\math_util.cpp" />
<ClCompile Include="Math\Statistics.cpp" />
<ClCompile Include="Net\NetBuffer.cpp" />
<ClCompile Include="Net\HTTPClient.cpp" />
<ClCompile Include="Net\HTTPHeaders.cpp" />
Expand Down
6 changes: 6 additions & 0 deletions Common/Common.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
<ClInclude Include="Math\Statistics.h">
<Filter>Math</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ABI.cpp" />
Expand Down Expand Up @@ -797,6 +800,9 @@
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
<ClCompile Include="Math\Statistics.cpp">
<Filter>Math</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Crypto">
Expand Down
4 changes: 4 additions & 0 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,8 @@ void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
}
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType);
steps[i]->render.numDraws += steps[j]->render.numDraws;
steps[i]->render.numReads += steps[j]->render.numReads;
// Cheaply skip the first step.
steps[j]->stepType = VKRStepType::RENDER_SKIP;
break;
Expand Down Expand Up @@ -936,6 +938,8 @@ void VulkanQueueRunner::ApplyRenderPassMerge(std::vector<VKRStep *> &steps) {
// So we don't consider it for other things, maybe doesn't matter.
src->dependencies.clear();
src->stepType = VKRStepType::RENDER_SKIP;
dst->render.numDraws += src->render.numDraws;
dst->render.numReads += src->render.numReads;
dst->render.pipelineFlags |= src->render.pipelineFlags;
dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType);
};
Expand Down
30 changes: 26 additions & 4 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,12 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
img.tag = tag ? tag : "N/A";
}

VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
: vulkan_(vulkan), queueRunner_(vulkan),
initTimeMs_("initTimeMs"),
totalGPUTimeMs_("totalGPUTimeMs"),
renderCPUTimeMs_("renderCPUTimeMs")
{
inflightFramesAtStart_ = vulkan_->GetInflightFrames();

frameDataShared_.Init(vulkan);
Expand Down Expand Up @@ -579,14 +584,25 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
std::stringstream str;

char line[256];
snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
totalGPUTimeMs_.Format(line, sizeof(line));
str << line;
snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
renderCPUTimeMs_.Format(line, sizeof(line));
str << line;
for (int i = 0; i < numQueries - 1; i++) {
uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
double milliseconds = (double)diff * timestampConversionFactor;
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);

// Can't use SimpleStat for these very easily since these are dynamic per frame.
// Only the first one is static, the initCmd.
// Could try some hashtable tracking for the rest, later.
if (i == 0) {
initTimeMs_.Update(milliseconds);
initTimeMs_.Format(line, sizeof(line));
} else {
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
}
str << line;
}
frameData.profile.profileSummary = str.str();
Expand Down Expand Up @@ -1344,3 +1360,9 @@ void VulkanRenderManager::FlushSync() {
frameData.syncDone = false;
}
}

void VulkanRenderManager::ResetStats() {
initTimeMs_.Reset();
totalGPUTimeMs_.Reset();
renderCPUTimeMs_.Reset();
}
8 changes: 8 additions & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <thread>
#include <queue>

#include "Common/Math/Statistics.h"
#include "Common/Thread/Promise.h"
#include "Common/System/Display.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
Expand Down Expand Up @@ -465,6 +466,8 @@ class VulkanRenderManager {
return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES;
}

void ResetStats();

private:
void EndCurRenderStep();

Expand Down Expand Up @@ -535,4 +538,9 @@ class VulkanRenderManager {

// pipelines to check and possibly create at the end of the current render pass.
std::vector<VKRGraphicsPipeline *> pipelinesToCheck_;

// For nicer output in the little internal GPU profiler.
SimpleStat initTimeMs_;
SimpleStat totalGPUTimeMs_;
SimpleStat renderCPUTimeMs_;
};
4 changes: 4 additions & 0 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,10 @@ class VKContext : public DrawContext {

void FlushState() override {}

void ResetStats() override {
renderManager_.ResetStats();
}

std::string GetInfoString(InfoField info) const override {
// TODO: Make these actually query the right information
switch (info) {
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,9 @@ class DrawContext {
// Flush state like scissors etc so the caller can do its own custom drawing.
virtual void FlushState() {}

// This is called when we launch a new game, so any collected internal stats in the backends don't carry over.
virtual void ResetStats() {}

virtual int GetCurrentStepId() const = 0;

protected:
Expand Down
11 changes: 11 additions & 0 deletions Common/Math/Statistics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include <cstdio>

#include "Common/Math/Statistics.h"

void SimpleStat::Format(char *buffer, size_t sz) {
if (min_ == INFINITY) {
snprintf(buffer, sz, "%s: N/A\n", name_);
} else {
snprintf(buffer, sz, "%s: %0.2f (%0.2f..%0.2f, avg %0.2f)\n", name_, value_, min_, max_, smoothed_);
}
}
42 changes: 42 additions & 0 deletions Common/Math/Statistics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once

#include <cmath>

// Very simple stat for convenience. Keeps track of min, max, smoothed.
struct SimpleStat {
SimpleStat(const char *name) : name_(name) { Reset(); }

void Update(double value) {
value_ = value;
if (min_ == INFINITY) {
smoothed_ = value;
} else {
// TODO: Make factor adjustable?
smoothed_ = 0.99 * smoothed_ + 0.01 * value;
}
if (value < min_) {
min_ = value;
}
if (value > max_) {
max_ = value;
}
}

void Reset() {
value_ = 0.0;
smoothed_ = 0.0; // doens't really need init
min_ = INFINITY;
max_ = -INFINITY;
}

void Format(char *buffer, size_t sz);

private:
const char *name_;

// These are initialized in Reset().
double value_;
double min_;
double max_;
double smoothed_;
};
43 changes: 2 additions & 41 deletions Common/Math/math_util.cpp
Original file line number Diff line number Diff line change
@@ -1,44 +1,5 @@
#include "Common/Math/math_util.h"
#include <stdlib.h>

// QNX can only use RunFast mode and it is already the default.
#if defined(__ARM_ARCH_7A__)
// Enables 'RunFast' VFP mode.
void EnableFZ() {
int x;
asm(
"fmrx %[result],FPSCR \r\n"
"orr %[result],%[result],#16777216 \r\n"
"fmxr FPSCR,%[result]"
:[result] "=r" (x) : :
);
//printf("ARM FPSCR: %08x\n",x);
}
#include <cstdlib>

// New fastmode code from: http://pandorawiki.org/Floating_Point_Optimization
// These settings turbocharge the slow VFP unit on Cortex-A8 based chips by setting
// restrictions that permit running VFP instructions on the NEON unit.
// Denormal flush-to-zero, for example.
void FPU_SetFastMode() {
static const unsigned int x = 0x04086060;
static const unsigned int y = 0x03000000;
int r;
asm volatile (
"fmrx %0, fpscr \n\t" //r0 = FPSCR
"and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060
"orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000
"fmxr fpscr, %0 \n\t" //FPSCR = r0
: "=r"(r)
: "r"(x), "r"(y)
);
}

#else

void EnableFZ() {
// TODO
}

void FPU_SetFastMode() {}

#endif
// Could delete this file, but might find use again.
9 changes: 0 additions & 9 deletions Common/Math/math_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,3 @@ inline uint16_t ShrinkToHalf(float full) {
FP16 fp = float_to_half_fast3(fp32);
return fp.u;
}

// FPU control.
void EnableFZ();

// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
// where they can schedule VFP instructions on the NEON unit (these implementations have
// very slow VFP units).
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
void FPU_SetFastMode();
2 changes: 1 addition & 1 deletion GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer
}

if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "Blit_CopyFramebufferForColorTexture");
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
}
}

Expand Down
1 change: 0 additions & 1 deletion Qt/QtMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,6 @@ static int mainInternal(QApplication &a) {
emugl->resize(pixel_xres, pixel_yres);
emugl->showFullScreen();
#endif
EnableFZ();
// Disable screensaver
#if defined(QT_HAS_SYSTEMINFO)
QScreenSaver ssObject(emugl);
Expand Down
1 change: 0 additions & 1 deletion SDL/SDLMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,6 @@ int main(int argc, char *argv[]) {
} else {
joystick = nullptr;
}
EnableFZ();

int framecount = 0;
bool mouseDown = false;
Expand Down
2 changes: 2 additions & 0 deletions UI/EmuScreen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ void EmuScreen::bootGame(const Path &filename) {

loadingViewColor_->Divert(0xFFFFFFFF, 0.75f);
loadingViewVisible_->Divert(UI::V_VISIBLE, 0.75f);

screenManager()->getDrawContext()->ResetStats();
}

void EmuScreen::bootComplete() {
Expand Down
4 changes: 3 additions & 1 deletion UWP/CommonUWP/CommonUWP.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@
<ClInclude Include="..\..\Common\BitSet.h" />
<ClInclude Include="..\..\Common\Buffer.h" />
<ClInclude Include="..\..\Common\File\AndroidStorage.h" />
<ClInclude Include="..\..\Common\Math\Statistics.h" />
<ClInclude Include="..\..\Common\Net\NetBuffer.h" />
<ClInclude Include="..\..\Common\Data\Collections\ConstMap.h" />
<ClInclude Include="..\..\Common\Data\Collections\FixedSizeQueue.h" />
Expand Down Expand Up @@ -519,6 +520,7 @@
<ClCompile Include="..\..\Common\ArmEmitter.cpp" />
<ClCompile Include="..\..\Common\Buffer.cpp" />
<ClCompile Include="..\..\Common\File\AndroidStorage.cpp" />
<ClCompile Include="..\..\Common\Math\Statistics.cpp" />
<ClCompile Include="..\..\Common\Net\NetBuffer.cpp" />
<ClCompile Include="..\..\Common\Data\Color\RGBAUtil.cpp" />
<ClCompile Include="..\..\Common\Data\Convert\SmallDataConvert.cpp" />
Expand Down Expand Up @@ -639,4 +641,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>
14 changes: 8 additions & 6 deletions UWP/CommonUWP/CommonUWP.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@
<ClCompile Include="..\..\Common\File\AndroidStorage.cpp">
<Filter>File</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\Math\Statistics.cpp">
<Filter>Math</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="targetver.h" />
Expand Down Expand Up @@ -688,11 +691,9 @@
<ClInclude Include="..\..\Common\File\AndroidStorage.h">
<Filter>File</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="..\..\ext\libpng17\CMakeLists.txt">
<Filter>ext\libpng17</Filter>
</Text>
<ClInclude Include="..\..\Common\Math\Statistics.h">
<Filter>Math</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\Common\Math\fast\fast_matrix_neon.S">
Expand All @@ -701,5 +702,6 @@
<None Include="..\..\Common\Math\lin\matrix_neon.s">
<Filter>Math\lin</Filter>
</None>
<None Include="..\..\ext\libpng17\CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>
1 change: 1 addition & 0 deletions android/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/Common/Input/InputState.cpp \
$(SRC)/Common/Math/fast/fast_matrix.c \
$(SRC)/Common/Math/math_util.cpp \
$(SRC)/Common/Math/Statistics.cpp \
$(SRC)/Common/Math/curves.cpp \
$(SRC)/Common/Math/expression_parser.cpp \
$(SRC)/Common/Math/lin/vec3.cpp.arm \
Expand Down
3 changes: 0 additions & 3 deletions ios/ViewController.mm
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,3 @@ void bindDefaultFBO()
{
[sharedViewController bindDefaultFBO];
}

void EnableFZ(){};
void DisableFZ(){};
1 change: 1 addition & 0 deletions libretro/Makefile.common
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ SOURCES_CXX += \
$(COMMONDIR)/Math/curves.cpp \
$(COMMONDIR)/Math/expression_parser.cpp \
$(COMMONDIR)/Math/math_util.cpp \
$(COMMONDIR)/Math/Statistics.cpp \
$(COMMONDIR)/Math/lin/vec3.cpp \
$(COMMONDIR)/Math/lin/matrix4x4.cpp \
$(COMMONDIR)/Net/HTTPClient.cpp \
Expand Down

0 comments on commit d56bdcb

Please sign in to comment.