Skip to content

Commit

Permalink
Merge pull request #11286 from K0bin/vk-query-fix
Browse files Browse the repository at this point in the history
VideoBackends: Query fixes and cleanups
  • Loading branch information
JMC47 committed Dec 19, 2022
2 parents 9f4d999 + 35a6d16 commit fb8aa97
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 76 deletions.
18 changes: 9 additions & 9 deletions Source/Core/VideoBackends/D3D/D3DPerfQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ PerfQuery::PerfQuery() : m_query_read_pos()

PerfQuery::~PerfQuery() = default;

void PerfQuery::EnableQuery(PerfQueryGroup type)
void PerfQuery::EnableQuery(PerfQueryGroup group)
{
u32 query_count = m_query_count.load(std::memory_order_relaxed);

Expand All @@ -44,21 +44,21 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
}

// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

D3D::context->Begin(entry.query.Get());
entry.query_type = type;
entry.query_group = group;

m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
void PerfQuery::DisableQuery(PerfQueryGroup group)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count.load(std::memory_order_relaxed) +
m_query_buffer.size() - 1) %
Expand Down Expand Up @@ -116,8 +116,8 @@ void PerfQuery::FlushOne()
// hardware behavior when drawing triangles.
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
m_query_count.fetch_sub(1, std::memory_order_relaxed);
Expand Down Expand Up @@ -145,8 +145,8 @@ void PerfQuery::WeakFlush()
// NOTE: Reported pixel metrics should be referenced to native resolution
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].store(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
m_results[entry.query_group].store(static_cast<u32>(native_res_result),
std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
m_query_count.fetch_sub(1, std::memory_order_relaxed);
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/D3D/D3DPerfQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class PerfQuery : public PerfQueryBase
PerfQuery();
~PerfQuery();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
Expand All @@ -26,7 +26,7 @@ class PerfQuery : public PerfQueryBase
struct ActiveQuery
{
ComPtr<ID3D11Query> query;
PerfQueryGroup query_type{};
PerfQueryGroup query_group{};
};

void WeakFlush();
Expand Down
13 changes: 7 additions & 6 deletions Source/Core/VideoBackends/D3D12/D3D12PerfQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bool PerfQuery::Initialize()
return true;
}

void PerfQuery::EnableQuery(PerfQueryGroup type)
void PerfQuery::EnableQuery(PerfQueryGroup group)
{
// Block if there are no free slots.
// Otherwise, try to keep half of them available.
Expand All @@ -66,20 +66,21 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
// this assumes that the caller has bound all required state prior to enabling the query.
Renderer::GetInstance()->ApplyState();

if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
ASSERT(!entry.has_value && !entry.resolved);
entry.has_value = true;
entry.query_group = group;

g_dx_context->GetCommandList()->BeginQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
m_query_next_pos);
}
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
void PerfQuery::DisableQuery(PerfQueryGroup group)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
m_query_next_pos);
Expand Down Expand Up @@ -245,8 +246,8 @@ void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH /
g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
}

constexpr D3D12_RANGE write_range = {0, 0};
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/D3D12/D3D12PerfQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class PerfQuery final : public PerfQueryBase
bool Initialize();
void ResolveQueries();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
Expand All @@ -31,7 +31,7 @@ class PerfQuery final : public PerfQueryBase
struct ActiveQuery
{
u64 fence_value;
PerfQueryType query_type;
PerfQueryGroup query_group;
bool has_value;
bool resolved;
};
Expand Down
34 changes: 17 additions & 17 deletions Source/Core/VideoBackends/OGL/OGLPerfQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ PerfQuery::PerfQuery() : m_query_read_pos()
ResetQuery();
}

void PerfQuery::EnableQuery(PerfQueryGroup type)
void PerfQuery::EnableQuery(PerfQueryGroup group)
{
m_query->EnableQuery(type);
m_query->EnableQuery(group);
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
void PerfQuery::DisableQuery(PerfQueryGroup group)
{
m_query->DisableQuery(type);
m_query->DisableQuery(group);
}

bool PerfQuery::IsFlushed() const
Expand Down Expand Up @@ -96,7 +96,7 @@ PerfQueryGL::~PerfQueryGL()
glDeleteQueries(1, &query.query_id);
}

void PerfQueryGL::EnableQuery(PerfQueryGroup type)
void PerfQueryGL::EnableQuery(PerfQueryGroup group)
{
u32 query_count = m_query_count.load(std::memory_order_relaxed);

Expand All @@ -115,20 +115,20 @@ void PerfQueryGL::EnableQuery(PerfQueryGroup type)
}

// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

glBeginQuery(m_query_type, entry.query_id);
entry.query_type = type;
entry.query_group = group;

m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
void PerfQueryGL::DisableQuery(PerfQueryGroup group)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
glEndQuery(m_query_type);
}
Expand Down Expand Up @@ -171,7 +171,7 @@ void PerfQueryGL::FlushOne()
if (g_ActiveConfig.iMultisamples > 1)
result /= g_ActiveConfig.iMultisamples;

m_results[entry.query_type].fetch_add(result, std::memory_order_relaxed);
m_results[entry.query_group].fetch_add(result, std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
m_query_count.fetch_sub(1, std::memory_order_relaxed);
Expand All @@ -196,7 +196,7 @@ PerfQueryGLESNV::~PerfQueryGLESNV()
glDeleteOcclusionQueriesNV(1, &query.query_id);
}

void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup group)
{
u32 query_count = m_query_count.load(std::memory_order_relaxed);

Expand All @@ -215,20 +215,20 @@ void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
}

// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

glBeginOcclusionQueryNV(entry.query_id);
entry.query_type = type;
entry.query_group = group;

m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup group)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
glEndOcclusionQueryNV();
}
Expand Down Expand Up @@ -266,8 +266,8 @@ void PerfQueryGLESNV::FlushOne()
// hardware behavior when drawing triangles.
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
m_query_count.fetch_sub(1, std::memory_order_relaxed);
Expand Down
14 changes: 7 additions & 7 deletions Source/Core/VideoBackends/OGL/OGLPerfQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class PerfQuery : public PerfQueryBase
public:
PerfQuery();
~PerfQuery() {}
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
Expand All @@ -30,7 +30,7 @@ class PerfQuery : public PerfQueryBase
struct ActiveQuery
{
GLuint query_id;
PerfQueryGroup query_type;
PerfQueryGroup query_group;
};

// when testing in SMS: 64 was too small, 128 was ok
Expand All @@ -52,8 +52,8 @@ class PerfQueryGL : public PerfQuery
PerfQueryGL(GLenum query_type);
~PerfQueryGL();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void FlushResults() override;

private:
Expand All @@ -70,8 +70,8 @@ class PerfQueryGLESNV : public PerfQuery
PerfQueryGLESNV();
~PerfQueryGLESNV();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void FlushResults() override;

private:
Expand Down
19 changes: 2 additions & 17 deletions Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,6 @@ void StagingBuffer::InvalidateGPUCache(VkCommandBuffer command_buffer,
VkPipelineStageFlagBits dest_pipeline_stage,
VkDeviceSize offset, VkDeviceSize size)
{
VkMemoryPropertyFlags flags = 0;
vmaGetAllocationMemoryProperties(g_vulkan_context->GetMemoryAllocator(), m_alloc, &flags);
if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) [[likely]]
return;

ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, offset,
size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage);
Expand All @@ -88,25 +83,15 @@ void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer,
VkPipelineStageFlagBits dst_pipeline_stage,
VkDeviceSize offset, VkDeviceSize size)
{
VkMemoryPropertyFlags flags = 0;
vmaGetAllocationMemoryProperties(g_vulkan_context->GetMemoryAllocator(), m_alloc, &flags);
if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) [[likely]]
return;

ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage);
BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_MEMORY_WRITE_BIT, dst_access_flags,
offset, size, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, dst_pipeline_stage);
}

void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags,
VkPipelineStageFlagBits src_pipeline_stage, VkDeviceSize offset,
VkDeviceSize size)
{
VkMemoryPropertyFlags flags = 0;
vmaGetAllocationMemoryProperties(g_vulkan_context->GetMemoryAllocator(), m_alloc, &flags);
if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) [[likely]]
return;

ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, offset,
size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT);
Expand Down
Loading

0 comments on commit fb8aa97

Please sign in to comment.