Skip to content

Commit

Permalink
Add buffer_copy method to RenderingDevice interface and an implementa…
Browse files Browse the repository at this point in the history
…tion for the Vulkan driver.

Direct buffer copies are required to perform certain operations more efficiently, as the only current alternative is to download the buffer to the CPU and upload it again. As the first use case, the new function is used when enabling motion vectors on multimeshes.
  • Loading branch information
DarioSamo committed Aug 12, 2023
1 parent 4714e95 commit 0d7deca
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 6 deletions.
58 changes: 58 additions & 0 deletions drivers/vulkan/rendering_device_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5905,6 +5905,64 @@ void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_
us->invalidated_callback_userdata = p_userdata;
}

Error RenderingDeviceVulkan::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_

ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER,
"Copying buffers is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER,
"Copying buffers is forbidden during creation of a compute list");

// This method assumes the barriers have been pushed prior to being called, therefore no barriers are pushed
// for the source or destination buffers before performing the copy. These masks are effectively ignored.
VkPipelineShaderStageCreateFlags src_stage_mask = 0;
VkAccessFlags src_access_mask = 0;
Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_NO_BARRIER);
if (!src_buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Source buffer argument is not a valid buffer of any type.");
}

VkPipelineStageFlags dst_stage_mask = 0;
VkAccessFlags dst_access = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
// If the post barrier mask defines it, we indicate the destination buffer will require a barrier with these flags set
// after the copy command is queued.
dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
}

Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer, dst_stage_mask, dst_access, p_post_barrier);
if (!dst_buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Destination buffer argument is not a valid buffer of any type.");
}

// Validate the copy's dimensions for both buffers.
ERR_FAIL_COND_V_MSG((p_size + p_src_offset) > src_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the source buffer.");
ERR_FAIL_COND_V_MSG((p_size + p_dst_offset) > dst_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the destination buffer.");

// Perform the copy.
VkBufferCopy region;
region.srcOffset = p_src_offset;
region.dstOffset = p_dst_offset;
region.size = p_size;
vkCmdCopyBuffer(frames[frame].draw_command_buffer, src_buffer->buffer, dst_buffer->buffer, 1, &region);

#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}

// As indicated by the post barrier mask, push a new barrier.
if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) {
_buffer_memory_barrier(dst_buffer->buffer, p_dst_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true);
}
#endif

return OK;
}

Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_

Expand Down
1 change: 1 addition & 0 deletions drivers/vulkan/rendering_device_vulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
virtual bool uniform_set_is_valid(RID p_uniform_set);
virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata);

virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS);
virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); // Works for any buffer.
virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS);
virtual Vector<uint8_t> buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0);
Expand Down
9 changes: 3 additions & 6 deletions servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1309,13 +1309,10 @@ void MeshStorage::_multimesh_enable_motion_vectors(MultiMesh *multimesh) {
RID new_buffer = RD::get_singleton()->storage_buffer_create(new_buffer_size);

if (multimesh->buffer_set && multimesh->data_cache.is_empty()) {
// If the buffer was set but there's no data cached in the CPU, we must download it from the GPU and
// upload it because RD does not provide a way to copy the buffer directly yet.
// If the buffer was set but there's no data cached in the CPU, we copy the buffer directly on the GPU.
RD::get_singleton()->barrier();
Vector<uint8_t> buffer_data = RD::get_singleton()->buffer_get_data(multimesh->buffer);
ERR_FAIL_COND(buffer_data.size() != int(buffer_size));
RD::get_singleton()->buffer_update(new_buffer, 0, buffer_size, buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER);
RD::get_singleton()->buffer_update(new_buffer, buffer_size, buffer_size, buffer_data.ptr());
RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, 0, buffer_size, RD::BARRIER_MASK_NO_BARRIER);
RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, buffer_size, buffer_size);
} else if (!multimesh->data_cache.is_empty()) {
// Simply upload the data cached in the CPU, which should already be doubled in size.
ERR_FAIL_COND(multimesh->data_cache.size() != int(new_buffer_size));
Expand Down
1 change: 1 addition & 0 deletions servers/rendering/rendering_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ class RenderingDevice : public Object {
virtual bool uniform_set_is_valid(RID p_uniform_set) = 0;
virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata) = 0;

virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0;
virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0;
virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0;
virtual Vector<uint8_t> buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0) = 0; // This causes stall, only use to retrieve large buffers for saving.
Expand Down

0 comments on commit 0d7deca

Please sign in to comment.