Skip to content
Permalink
Browse files

Merge pull request #7905 from stenzek/vulkan-cleanup

Vulkan: Simplify command buffer fence tracking
  • Loading branch information...
stenzek committed Mar 29, 2019
2 parents f3fadd7 + 604ab67 commit 377615b06fd61717edb39b51964e611fd2da8cdc
@@ -99,7 +99,7 @@ void BoundingBox::Flush()
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
}

// We're now up-to-date.
@@ -223,7 +223,7 @@ void BoundingBox::Readback()
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
@@ -237,7 +237,7 @@ void BoundingBox::Readback()
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

@@ -54,7 +54,6 @@ bool CommandBufferManager::CreateCommandBuffers()
{
resources.init_command_buffer_used = false;
resources.semaphore_used = false;
resources.needs_fence_wait = false;

VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0,
g_vulkan_context->GetGraphicsQueueFamilyIndex()};
@@ -211,43 +210,61 @@ void CommandBufferManager::WaitForWorkerThreadIdle()
m_submit_semaphore.Post();
}

void CommandBufferManager::WaitForGPUIdle()
void CommandBufferManager::WaitForFenceCounter(u64 fence_counter)
{
WaitForWorkerThreadIdle();
vkDeviceWaitIdle(g_vulkan_context->GetDevice());
}
if (m_completed_fence_counter >= fence_counter)
return;

void CommandBufferManager::WaitForFence(VkFence fence)
{
// Find the command buffer that this fence corresponds to.
u32 command_buffer_index = 0;
for (; command_buffer_index < static_cast<u32>(m_frame_resources.size()); command_buffer_index++)
// Find the first command buffer which covers this counter value.
u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
while (index != m_current_frame)
{
if (m_frame_resources[command_buffer_index].fence == fence)
if (m_frame_resources[index].fence_counter >= fence_counter)
break;

index = (index + 1) % NUM_COMMAND_BUFFERS;
}
ASSERT(command_buffer_index < m_frame_resources.size());

// Has this command buffer already been waited for?
if (!m_frame_resources[command_buffer_index].needs_fence_wait)
return;
ASSERT(index != m_current_frame);
WaitForCommandBufferCompletion(index);
}

void CommandBufferManager::WaitForCommandBufferCompletion(u32 index)
{
// Ensure this command buffer has been submitted.
WaitForWorkerThreadIdle();

// Wait for this command buffer to be completed.
VkResult res =
vkWaitForFences(g_vulkan_context->GetDevice(), 1,
&m_frame_resources[command_buffer_index].fence, VK_TRUE, UINT64_MAX);
VkResult res = vkWaitForFences(g_vulkan_context->GetDevice(), 1, &m_frame_resources[index].fence,
VK_TRUE, UINT64_MAX);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");

// Immediately fire callbacks and cleanups, since the commands has been completed.
m_frame_resources[command_buffer_index].needs_fence_wait = false;
OnCommandBufferExecuted(command_buffer_index);
// Clean up any resources for command buffers between the last known completed buffer and this
// now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
const u64 now_completed_counter = m_frame_resources[index].fence_counter;
u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
while (cleanup_index != m_current_frame)
{
FrameResources& resources = m_frame_resources[cleanup_index];
if (resources.fence_counter > now_completed_counter)
break;

if (resources.fence_counter > m_completed_fence_counter)
{
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();
}

cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS;
}

m_completed_fence_counter = now_completed_counter;
}

void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
bool wait_for_completion,
VkSwapchainKHR present_swap_chain,
uint32_t present_image_index)
{
@@ -263,16 +280,13 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
}
}

// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;

// Grab the semaphore before submitting command buffer either on-thread or off-thread.
// This prevents a race from occurring where a second command buffer is executed
// before the worker thread has woken and executed the first one yet.
m_submit_semaphore.Wait();

// Submitting off-thread?
if (m_use_threaded_submission && submit_on_worker_thread)
if (m_use_threaded_submission && submit_on_worker_thread && !wait_for_completion)
{
// Push to the pending submit queue.
{
@@ -287,6 +301,8 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
{
// Pass through to normal submission path.
SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index);
if (wait_for_completion)
WaitForCommandBufferCompletion(m_current_frame);
}

// Switch to next cmdbuffer.
@@ -365,39 +381,15 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
m_submit_semaphore.Post();
}

void CommandBufferManager::OnCommandBufferExecuted(u32 index)
{
FrameResources& resources = m_frame_resources[index];

// Fire fence tracking callbacks.
for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();)
{
auto backup_iter = iter++;
backup_iter->second(resources.fence);
}

// Clean up all objects pending destruction on this command buffer
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();
}

void CommandBufferManager::BeginCommandBuffer()
{
// Move to the next command buffer.
m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
FrameResources& resources = m_frame_resources[m_current_frame];
const u32 next_buffer_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
FrameResources& resources = m_frame_resources[next_buffer_index];

// Wait for the GPU to finish with all resources for this command buffer.
if (resources.needs_fence_wait)
{
VkResult res =
vkWaitForFences(g_vulkan_context->GetDevice(), 1, &resources.fence, true, UINT64_MAX);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");

OnCommandBufferExecuted(m_current_frame);
}
if (resources.fence_counter > m_completed_fence_counter)
WaitForCommandBufferCompletion(next_buffer_index);

// Reset fence to unsignaled before starting.
VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence);
@@ -427,6 +419,8 @@ void CommandBufferManager::BeginCommandBuffer()
// Reset upload command buffer state
resources.init_command_buffer_used = false;
resources.semaphore_used = false;
resources.fence_counter = m_next_fence_counter++;
m_current_frame = next_buffer_index;
}

void CommandBufferManager::DeferBufferDestruction(VkBuffer object)
@@ -471,19 +465,5 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object)
[object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); });
}

void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback)
{
// Shouldn't be adding twice.
ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end());
m_fence_callbacks.emplace(key, std::move(callback));
}

void CommandBufferManager::RemoveFenceSignaledCallback(const void* key)
{
auto iter = m_fence_callbacks.find(key);
ASSERT(iter != m_fence_callbacks.end());
m_fence_callbacks.erase(iter);
}

std::unique_ptr<CommandBufferManager> g_command_buffer_mgr;
} // namespace Vulkan
@@ -51,9 +51,15 @@ class CommandBufferManager
// Allocates a descriptors set from the pool reserved for the current frame.
VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout);

// Fence "counters" are used to track which commands have been completed by the GPU.
// If the last completed fence counter is greater or equal to N, it means that the work
// associated counter N has been completed by the GPU. The value of N to associate with
// commands can be retreived by calling GetCurrentFenceCounter().
u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; }

// Gets the fence that will be signaled when the currently executing command buffer is
// queued and executed. Do not wait for this fence before the buffer is executed.
VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; }
u64 GetCurrentFenceCounter() const { return m_frame_resources[m_current_frame].fence_counter; }

// Returns the semaphore for the current command buffer, which can be used to ensure the
// swap chain image is ready before the command buffer executes.
@@ -66,15 +72,11 @@ class CommandBufferManager
// Ensure that the worker thread has submitted any previous command buffers and is idle.
void WaitForWorkerThreadIdle();

// Ensure that the worker thread has both submitted all commands, and the GPU has caught up.
// Use with caution, huge performance penalty.
void WaitForGPUIdle();

// Wait for a fence to be completed.
// Also invokes callbacks for completion.
void WaitForFence(VkFence fence);
void WaitForFenceCounter(u64 fence_counter);

void SubmitCommandBuffer(bool submit_on_worker_thread,
void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion,
VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE,
uint32_t present_image_index = 0xFFFFFFFF);

@@ -90,25 +92,17 @@ class CommandBufferManager
void DeferImageDestruction(VkImage object);
void DeferImageViewDestruction(VkImageView object);

// Instruct the manager to fire the specified callback when a fence is flagged to be signaled.
// This happens when command buffers are executed, and can be tested if signaled, which means
// that all commands up to the point when the callback was fired have completed.
using FenceSignaledCallback = std::function<void(VkFence)>;
void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback);
void RemoveFenceSignaledCallback(const void* key);

private:
bool CreateCommandBuffers();
void DestroyCommandBuffers();

bool CreateSubmitThread();

void WaitForCommandBufferCompletion(u32 command_buffer_index);
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
u32 present_image_index);
void BeginCommandBuffer();

void OnCommandBufferExecuted(u32 index);

struct FrameResources
{
// [0] - Init (upload) command buffer, [1] - draw command buffer
@@ -117,19 +111,19 @@ class CommandBufferManager
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
VkFence fence = VK_NULL_HANDLE;
VkSemaphore semaphore = VK_NULL_HANDLE;
u64 fence_counter = 0;
bool init_command_buffer_used = false;
bool semaphore_used = false;
bool needs_fence_wait = false;

std::vector<std::function<void()>> cleanup_resources;
};

u64 m_next_fence_counter = 1;
u64 m_completed_fence_counter = 0;

std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources;
u32 m_current_frame;

// callbacks when a fence point is set
std::map<const void*, FenceSignaledCallback> m_fence_callbacks;

// Threaded command buffer execution
// Semaphore determines when a command buffer can be queued
Common::Semaphore m_submit_semaphore;

0 comments on commit 377615b

Please sign in to comment.
You can’t perform that action at this time.