Permalink
Browse files

More vulkan microoptimizations. Add more profiler scopes.

  • Loading branch information...
hrydgard committed Aug 18, 2017
1 parent ed776d8 commit b9b2656e93133ed781e082005c6cc5311c756c13
@@ -129,8 +129,6 @@ namespace MIPSComp {
}
}
DISABLE;
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
std::vector<FixupBranch> skips;
View
@@ -3,6 +3,8 @@
#include <mutex>
#include "base/timeutil.h"
#include "profiler/profiler.h"
#include "Common/ColorConv.h"
#include "Core/Reporting.h"
#include "GPU/GeDisasm.h"
@@ -1211,6 +1213,7 @@ void GPUCommon::Execute_BJump(u32 op, u32 diff) {
}
void GPUCommon::Execute_Call(u32 op, u32 diff) {
PROFILE_THIS_SCOPE("gpu_call");
easy_guard guard(listLock);
// Saint Seiya needs correct support for relative calls.
@@ -1559,6 +1562,7 @@ void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
}
void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) {
PROFILE_THIS_SCOPE("block");
Flush();
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
// Can we skip this on SkipDraw?
@@ -1574,6 +1578,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
// We must record the individual data commands while debugRecording_.
bool fastLoad = !debugRecording_;
// Stalling in the middle of a matrix would be stupid, I doubt this check is necessary.

This comment has been minimized.

Show comment
Hide comment
@unknownbrackets

unknownbrackets Aug 20, 2017

Collaborator

Maybe, but it did happen with bone matrix loads. The case was a partially overwritten matrix load (the stall address was on matrix data, but that would be overwritten with something else after the stall.)

Example, pass 1:

NOP
MATRIX LOAD 1
MATRIX LOAD 2
MATRIX LOAD 3
(stall) garbage data

Pass 2:

MATRIX LOAD 1
MATRIX LOAD 2
MATRIX LOAD 3
(stall) MATRIX LOAD 3 (but actually garbage)

That's why.

-[Unknown]

@unknownbrackets

unknownbrackets Aug 20, 2017

Collaborator

Maybe, but it did happen with bone matrix loads. The case was a partially overwritten matrix load (the stall address was on matrix data, but that would be overwritten with something else after the stall.)

Example, pass 1:

NOP
MATRIX LOAD 1
MATRIX LOAD 2
MATRIX LOAD 3
(stall) garbage data

Pass 2:

MATRIX LOAD 1
MATRIX LOAD 2
MATRIX LOAD 3
(stall) MATRIX LOAD 3 (but actually garbage)

That's why.

-[Unknown]

This comment has been minimized.

Show comment
Hide comment
@hrydgard

hrydgard Aug 20, 2017

Owner

Oh, I see. Thanks, makes sense.

@hrydgard

hrydgard Aug 20, 2017

Owner

Oh, I see. Thanks, makes sense.

if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
}
@@ -1786,7 +1791,7 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
}
} else {
@@ -1798,7 +1803,7 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
}
@@ -20,6 +20,7 @@
#include "base/logging.h"
#include "base/timeutil.h"
#include "math/dataconv.h"
#include "profiler/profiler.h"
#include "Common/MemoryUtil.h"
#include "Core/MemMap.h"
@@ -51,7 +52,7 @@
enum {
VERTEX_CACHE_SIZE = 4096 * 1024
VERTEX_CACHE_SIZE = 8192 * 1024
};
#define VERTEXCACHE_DECIMATION_INTERVAL 17
@@ -275,8 +276,6 @@ void DrawEngineVulkan::BeginFrame() {
lastPipeline_ = nullptr;
FrameData *frame = &frame_[curFrame_];
vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0);
frame->descSets.clear();
// First reset all buffers, then begin. This is so that Reset can free memory and Begin can allocate it,
// if growing the buffer is needed. Doing it this way will reduce fragmentation if more than one buffer
@@ -323,6 +322,8 @@ void DrawEngineVulkan::BeginFrame() {
vertexCache_->BeginNoReset();
if (--decimationCounter_ <= 0) {
vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0);
frame->descSets.clear();
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
@@ -498,7 +499,7 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset,
}
VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone) {
VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone) {
DescriptorSetKey key;
key.imageView_ = imageView;
key.sampler_ = sampler;
@@ -511,7 +512,7 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
assert(bone != VK_NULL_HANDLE);
FrameData *frame = &frame_[curFrame_];
if (!(gstate_c.bezier || gstate_c.spline)) { // Has no cache when HW tessellation.
if (!gstate_c.bezier && !gstate_c.spline) { // Has no cache when HW tessellation.
auto iter = frame->descSets.find(key);
if (iter != frame->descSets.end()) {
return iter->second;
@@ -629,8 +630,9 @@ void MarkUnreliable(VertexArrayInfoVulkan *vai) {
// For now we just leave it in the pushbuffer.
}
// The inline wrapper in the header checks for numDrawCalls == 0d
// The inline wrapper in the header checks for numDrawCalls == 0
void DrawEngineVulkan::DoFlush() {
PROFILE_THIS_SCOPE("Flush");
gpuStats.numFlushes++;
// TODO: Should be enough to update this once per frame?
gpuStats.numTrackedVertexArrays = (int)vai_.size();
@@ -640,7 +642,7 @@ void DrawEngineVulkan::DoFlush() {
lastPipeline_ = nullptr;
lastCmd_ = cmd;
// Since we have a new cmdbuf, dirty our dynamic state so it gets re-set.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE|DIRTY_DEPTHSTENCIL_STATE);
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE|DIRTY_DEPTHSTENCIL_STATE|DIRTY_BLEND_STATE);
}
VkRenderPass rp = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::CURRENT_RENDERPASS);
@@ -671,8 +673,6 @@ void DrawEngineVulkan::DoFlush() {
uint32_t ibOffset = 0;
uint32_t vbOffset = 0;
VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS);
if (useHWTransform) {
// We don't detect clears in this path, so here we can switch framebuffers if necessary.
@@ -690,6 +690,7 @@ void DrawEngineVulkan::DoFlush() {
}
if (useCache) {
PROFILE_THIS_SCOPE("vcache");
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
auto iter = vai_.find(id);
VertexArrayInfoVulkan *vai;
@@ -722,6 +723,7 @@ void DrawEngineVulkan::DoFlush() {
// But if we get this far it's likely to be worth uploading the data.
case VertexArrayInfoVulkan::VAI_HASHING:
{
PROFILE_THIS_SCOPE("vcachehash");
vai->numDraws++;
if (vai->lastFrame != gpuStats.numFlips) {
vai->numFrames++;
@@ -856,6 +858,7 @@ void DrawEngineVulkan::DoFlush() {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
PROFILE_THIS_SCOPE("updatestate");
if (textureNeedsApply) {
textureCache_->ApplyTexture();
textureCache_->GetVulkanHandles(imageView, sampler);
@@ -870,6 +873,7 @@ void DrawEngineVulkan::DoFlush() {
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
}
VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS);
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, dec_, vshader, fshader, true);
if (!pipeline) {
// Already logged, let's bail out.
@@ -885,10 +889,12 @@ void DrawEngineVulkan::DoFlush() {
lastPrim_ = prim;
dirtyUniforms_ |= shaderManager_->UpdateUniforms();
UpdateUBOs(frame);
VkDescriptorSet ds = GetDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf);
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf);
{
PROFILE_THIS_SCOPE("vkdraw");
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
@@ -911,7 +917,9 @@ void DrawEngineVulkan::DoFlush() {
vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets);
vkCmdDraw(cmd, vertexCount, 1, 0, 0);
}
}
} else {
PROFILE_THIS_SCOPE("soft");
// Decode to "decoded"
DecodeVerts(nullptr, nullptr, nullptr);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
@@ -928,7 +936,6 @@ void DrawEngineVulkan::DoFlush() {
prim = GE_PRIM_TRIANGLES;
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
lastPrim_ = prim;
int numTrans = 0;
bool drawIndexed = false;
u16 *inds = decIndex;
@@ -963,26 +970,37 @@ void DrawEngineVulkan::DoFlush() {
sampler = nullSampler_;
}
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
ApplyDrawStateLate(cmd, result.setStencil, result.stencilValue);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform);
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, dec_, vshader, fshader, false);
if (!pipeline) {
// Already logged, let's bail out.
return;
if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE) || prim != lastPrim_) {
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform);
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
}
VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(Draw::NativeObject::COMPATIBLE_RENDERPASS);
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, dec_, vshader, fshader, false);
if (!pipeline) {
// Already logged, let's bail out.
return;
}
if (pipeline != lastPipeline_) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw.
lastPipeline_ = pipeline;
}
ApplyDrawStateLate(cmd, false, 0);
gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
}
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw.
lastPrim_ = prim;
dirtyUniforms_ |= shaderManager_->UpdateUniforms();
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
UpdateUBOs(frame);
VkDescriptorSet ds = GetDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf);
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf);
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
};
PROFILE_THIS_SCOPE("vkdrawsoft");
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets);
if (drawIndexed) {
@@ -183,7 +183,7 @@ class DrawEngineVulkan : public DrawEngineCommon {
void DoFlush();
void UpdateUBOs(FrameData *frame);
VkDescriptorSet GetDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone);
VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone);
VulkanContext *vulkan_;
Draw::DrawContext *draw_;
@@ -193,6 +193,7 @@ class DrawEngineVulkan : public DrawEngineCommon {
VkPipelineLayout pipelineLayout_;
VkCommandBuffer lastCmd_ = VK_NULL_HANDLE;
VulkanPipeline *lastPipeline_;
VkDescriptorSet lastDs_ = VK_NULL_HANDLE;
std::unordered_map<u32, VertexArrayInfoVulkan *> vai_;
VulkanPushBuffer *vertexCache_;
@@ -459,6 +459,8 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
PROFILE_THIS_SCOPE("execprim");
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
@@ -1,5 +1,7 @@
#include <cstring>
#include "profiler/profiler.h"
#include "Common/Log.h"
#include "Common/StringUtils.h"
#include "Common/Vulkan/VulkanContext.h"
@@ -307,7 +309,9 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layo
if (iter != pipelines_.end()) {
return iter->second;
}
PROFILE_THIS_SCOPE("pipelinebuild");
VulkanPipeline *pipeline = CreateVulkanPipeline(
vulkan_->GetDevice(), pipelineCache_, layout, renderPass,
rasterKey, vtxDec, vs, fs, useHwTransform);
@@ -25,6 +25,7 @@
#include "math/lin/matrix4x4.h"
#include "math/math_util.h"
#include "math/dataconv.h"
#include "profiler/profiler.h"
#include "util/text/utf8.h"
#include "Common/Vulkan/VulkanContext.h"
#include "Common/Vulkan/VulkanMemory.h"
@@ -42,6 +43,7 @@
VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, ShaderID id, const char *code, bool useHWTransform)
: vulkan_(vulkan), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(0) {
PROFILE_THIS_SCOPE("shadercomp");
source_ = code;
std::string errorMessage;
@@ -99,6 +101,7 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co
VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting)
: vulkan_(vulkan), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(VK_NULL_HANDLE), usesLighting_(usesLighting) {
PROFILE_THIS_SCOPE("shadercomp");
source_ = code;
std::string errorMessage;
std::vector<uint32_t> spirv;
@@ -168,19 +171,6 @@ ShaderManagerVulkan::~ShaderManagerVulkan() {
delete[] codeBuffer_;
}
uint32_t ShaderManagerVulkan::PushBaseBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_base, sizeof(ub_base), uboAlignment_, buf);
}
uint32_t ShaderManagerVulkan::PushLightBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_lights, sizeof(ub_lights), uboAlignment_, buf);
}
// TODO: Only push half the bone buffer if we only have four bones.
uint32_t ShaderManagerVulkan::PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
}
void ShaderManagerVulkan::DeviceRestore(VulkanContext *vulkan) {
vulkan_ = vulkan;
uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
@@ -21,6 +21,7 @@
#include "base/basictypes.h"
#include "Globals.h"
#include "Common/Vulkan/VulkanMemory.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h"
@@ -113,9 +114,16 @@ class ShaderManagerVulkan : public ShaderManagerCommon {
bool IsLightDirty() { return true; }
bool IsBoneDirty() { return true; }
uint32_t PushBaseBuffer(VulkanPushBuffer *dest, VkBuffer *buf);
uint32_t PushLightBuffer(VulkanPushBuffer *dest, VkBuffer *buf);
uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf);
uint32_t PushBaseBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_base, sizeof(ub_base), uboAlignment_, buf);
}
uint32_t PushLightBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_lights, sizeof(ub_lights), uboAlignment_, buf);
}
// TODO: Only push half the bone buffer if we only have four bones.
uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
}
private:
void Clear();
Oops, something went wrong.

0 comments on commit b9b2656

Please sign in to comment.