35 changes: 24 additions & 11 deletions Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp
Expand Up @@ -35,10 +35,13 @@ bool PerfQuery::Initialize()
return false;
}

// Vulkan requires query pools to be reset after creation
ResetQuery();

return true;
}

void PerfQuery::EnableQuery(PerfQueryGroup type)
void PerfQuery::EnableQuery(PerfQueryGroup group)
{
// Block if there are no free slots.
// Otherwise, try to keep half of them available.
Expand All @@ -50,11 +53,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
// a buffer with open queries.
StateTracker::GetInstance()->Bind();

if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
DEBUG_ASSERT(!entry.has_value);
entry.has_value = true;
entry.query_group = group;

// Use precise queries if supported, otherwise boolean (which will be incorrect).
VkQueryControlFlags flags =
Expand All @@ -67,11 +71,14 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
}
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
void PerfQuery::DisableQuery(PerfQueryGroup group)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
{
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
entry.fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();

m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
m_query_count.fetch_add(1, std::memory_order_relaxed);
}
Expand Down Expand Up @@ -119,8 +126,10 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)

void PerfQuery::FlushResults()
{
while (!IsFlushed())
if (!IsFlushed())
PartialFlush(true);

ASSERT(IsFlushed());
}

bool PerfQuery::IsFlushed() const
Expand Down Expand Up @@ -185,13 +194,17 @@ void PerfQuery::ReadbackQueries(u32 query_count)
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

// Read back from the GPU.
VkResult res =
vkGetQueryPoolResults(g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos,
query_count, query_count * sizeof(PerfQueryDataType),
m_query_result_buffer.data(), sizeof(PerfQueryDataType), 0);
VkResult res = vkGetQueryPoolResults(
g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos, query_count,
query_count * sizeof(PerfQueryDataType), m_query_result_buffer.data(),
sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");

StateTracker::GetInstance()->EndRenderPass();
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool,
m_query_readback_pos, query_count);

// Remove pending queries.
for (u32 i = 0; i < query_count; i++)
{
Expand All @@ -207,8 +220,8 @@ void PerfQuery::ReadbackQueries(u32 query_count)
const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
}

m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/Vulkan/VKPerfQuery.h
Expand Up @@ -22,8 +22,8 @@ class PerfQuery : public PerfQueryBase

bool Initialize();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
Expand All @@ -40,7 +40,7 @@ class PerfQuery : public PerfQueryBase
struct ActiveQuery
{
u64 fence_counter;
PerfQueryType query_type;
PerfQueryGroup query_group;
bool has_value;
};

Expand Down