Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Reduce branching in TrackedRenderPass #7053

Closed
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 37 additions & 46 deletions crates/bevy_core_pipeline/src/bloom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use bevy_render::{
},
prelude::Camera,
render_graph::{Node, NodeRunError, RenderGraph, RenderGraphContext, SlotInfo, SlotType},
render_phase::TrackedRenderPass,
render_resource::*,
renderer::{RenderContext, RenderDevice},
texture::{CachedTexture, TextureCache},
Expand Down Expand Up @@ -232,17 +231,15 @@ impl Node for BloomNode {
{
let view = &BloomTextures::texture_view(&textures.texture_a, 0);
let mut prefilter_pass =
TrackedRenderPass::new(render_context.command_encoder.begin_render_pass(
&RenderPassDescriptor {
label: Some("bloom_prefilter_pass"),
color_attachments: &[Some(RenderPassColorAttachment {
view,
resolve_target: None,
ops: Operations::default(),
})],
depth_stencil_attachment: None,
},
));
render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("bloom_prefilter_pass"),
color_attachments: &[Some(RenderPassColorAttachment {
view,
resolve_target: None,
ops: Operations::default(),
})],
depth_stencil_attachment: None,
});
prefilter_pass.set_render_pipeline(downsampling_prefilter_pipeline);
prefilter_pass.set_bind_group(
0,
Expand All @@ -258,17 +255,15 @@ impl Node for BloomNode {
for mip in 1..textures.mip_count {
let view = &BloomTextures::texture_view(&textures.texture_a, mip);
let mut downsampling_pass =
TrackedRenderPass::new(render_context.command_encoder.begin_render_pass(
&RenderPassDescriptor {
label: Some("bloom_downsampling_pass"),
color_attachments: &[Some(RenderPassColorAttachment {
view,
resolve_target: None,
ops: Operations::default(),
})],
depth_stencil_attachment: None,
},
));
render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("bloom_downsampling_pass"),
color_attachments: &[Some(RenderPassColorAttachment {
view,
resolve_target: None,
ops: Operations::default(),
})],
depth_stencil_attachment: None,
});
downsampling_pass.set_render_pipeline(downsampling_pipeline);
downsampling_pass.set_bind_group(
0,
Expand All @@ -284,17 +279,15 @@ impl Node for BloomNode {
for mip in (1..textures.mip_count).rev() {
let view = &BloomTextures::texture_view(&textures.texture_b, mip - 1);
let mut upsampling_pass =
TrackedRenderPass::new(render_context.command_encoder.begin_render_pass(
&RenderPassDescriptor {
label: Some("bloom_upsampling_pass"),
color_attachments: &[Some(RenderPassColorAttachment {
view,
resolve_target: None,
ops: Operations::default(),
})],
depth_stencil_attachment: None,
},
));
render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("bloom_upsampling_pass"),
color_attachments: &[Some(RenderPassColorAttachment {
view,
resolve_target: None,
ops: Operations::default(),
})],
depth_stencil_attachment: None,
});
upsampling_pass.set_render_pipeline(upsampling_pipeline);
upsampling_pass.set_bind_group(
0,
Expand All @@ -309,18 +302,16 @@ impl Node for BloomNode {

{
let mut upsampling_final_pass =
TrackedRenderPass::new(render_context.command_encoder.begin_render_pass(
&RenderPassDescriptor {
label: Some("bloom_upsampling_final_pass"),
color_attachments: &[Some(view_target.get_unsampled_color_attachment(
Operations {
load: LoadOp::Load,
store: true,
},
))],
depth_stencil_attachment: None,
},
));
render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("bloom_upsampling_final_pass"),
color_attachments: &[Some(view_target.get_unsampled_color_attachment(
Operations {
load: LoadOp::Load,
store: true,
},
))],
depth_stencil_attachment: None,
});
upsampling_final_pass.set_render_pipeline(upsampling_final_pipeline);
upsampling_final_pass.set_bind_group(
0,
Expand Down
27 changes: 14 additions & 13 deletions crates/bevy_render/src/render_phase/draw_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ use crate::{
BindGroup, BindGroupId, Buffer, BufferId, BufferSlice, RenderPipeline, RenderPipelineId,
ShaderStages,
},
renderer::RenderDevice,
};
use bevy_utils::tracing::trace;
use bevy_utils::{default, tracing::trace};
use std::ops::Range;
use wgpu::{IndexFormat, RenderPass};

/// Tracks the current [`TrackedRenderPass`] state to ensure draw calls are valid.
#[derive(Debug, Default)]
pub struct DrawState {
struct DrawState {
pipeline: Option<RenderPipelineId>,
bind_groups: Vec<(Option<BindGroupId>, Vec<u32>)>,
vertex_buffers: Vec<Option<(BufferId, u64)>>,
Expand All @@ -26,12 +27,10 @@ impl DrawState {
bind_group: BindGroupId,
dynamic_indices: &[u32],
) {
if index >= self.bind_groups.len() {
self.bind_groups.resize(index + 1, (None, Vec::new()));
}
self.bind_groups[index].0 = Some(bind_group);
self.bind_groups[index].1.clear();
self.bind_groups[index].1.extend(dynamic_indices);
let group = &mut self.bind_groups[index];
group.0 = Some(bind_group);
group.1.clear();
group.1.extend(dynamic_indices);
}

pub fn is_bind_group_set(
Expand All @@ -48,9 +47,6 @@ impl DrawState {
}

pub fn set_vertex_buffer(&mut self, index: usize, buffer: BufferId, offset: u64) {
if index >= self.vertex_buffers.len() {
self.vertex_buffers.resize(index + 1, None);
}
self.vertex_buffers[index] = Some((buffer, offset));
}

Expand Down Expand Up @@ -98,9 +94,14 @@ pub struct TrackedRenderPass<'a> {

impl<'a> TrackedRenderPass<'a> {
/// Tracks the supplied render pass.
pub fn new(pass: RenderPass<'a>) -> Self {
pub fn new(device: &RenderDevice, pass: RenderPass<'a>) -> Self {
let limits = device.limits();
Self {
state: DrawState::default(),
state: DrawState {
Copy link
Contributor

@IceSentry IceSentry Jan 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be missing something, but does this means we always allocate a big buffer for every render pass? Is it small enough that it doesn't matter. The previous impl looked like it would just allocate memory when necessary, if you only need 2-3 bind groups this seems wasteful.

Is it just that the speed up is worth the small memory increase?

Copy link
Member Author

@james7132 james7132 Jan 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The latter. The actual size of these Vecs aren't that big (1-2KB at most), and is a static upfront cost compared to the scaling cost of branching on every RenderCommand (not just every entity) that uses it.

The actual limits here are also usually quite small. 16 for both vertex buffers and bind groups, IIRC.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, makes sense then, might be worth adding a small note in a comment I guess, but I'll approve anyway

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual limits here are also usually quite small. 16 for both vertex buffers and bind groups, IIRC.

Some devices may report u32::MAX for those limits instead of an actual value, #6841 for example for max_storage_buffers_per_shader_stage. I don't know if that's also the case for max_bind_groups or max_vertex_buffers but that's a possibility

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After talking about this with cwfitzgerald about this on Discord, it seems like the limits for vertex buffers and bind groups are generally pretty low (max a few hundred). It seems like the limit is hard set in wgpu-hal mostly to avoid reallocations from Vecs.

In the worst case, we should set a reasonable upper limit ourselves (i.e. 1024) which we use min with the Device provided limits to be 100% sure it's not overallocating.

Original discussion: https://discord.com/channels/691052431525675048/743663924229963868/1059900749636702309

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, after reading the discussion I had the same conclusion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ended up using wgpu-hal's public constants MAX_VERTEX_BUFFERS and MAX_BIND_GROUPS so we're not keeping them out of sync.

bind_groups: vec![(None, Vec::new()); limits.max_bind_groups as usize],
vertex_buffers: vec![None; limits.max_vertex_buffers as usize],
..default()
},
pass,
}
}
Expand Down
5 changes: 1 addition & 4 deletions crates/bevy_render/src/render_phase/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,7 @@ impl<I: PhaseItem> RenderPhase<I> {
viewport: Option<&Viewport>,
pass_descriptor: RenderPassDescriptor,
) {
let render_pass = render_context
.command_encoder
.begin_render_pass(&pass_descriptor);
let mut render_pass = TrackedRenderPass::new(render_pass);
let mut render_pass = render_context.begin_tracked_render_pass(pass_descriptor);

if let Some(viewport) = viewport {
render_pass.set_camera_viewport(viewport);
Expand Down
18 changes: 8 additions & 10 deletions crates/bevy_render/src/render_resource/resource_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ macro_rules! render_resource_wrapper {
macro_rules! define_atomic_id {
($atomic_id_type:ident) => {
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct $atomic_id_type(u32);
pub struct $atomic_id_type(core::num::NonZeroU32);

// We use new instead of default to indicate that each ID created will be unique.
#[allow(clippy::new_without_default)]
Expand All @@ -134,15 +134,13 @@ macro_rules! define_atomic_id {

static COUNTER: AtomicU32 = AtomicU32::new(1);

match COUNTER.fetch_add(1, Ordering::Relaxed) {
0 => {
panic!(
"The system ran out of unique `{}`s.",
stringify!($atomic_id_type)
);
}
id => Self(id),
}
let counter = COUNTER.fetch_add(1, Ordering::Relaxed);
Self(core::num::NonZeroU32::new(counter).unwrap_or_else(|| {
panic!(
"The system ran out of unique `{}`s.",
stringify!($atomic_id_type)
);
}))
}
}
};
Expand Down
16 changes: 16 additions & 0 deletions crates/bevy_render/src/renderer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ pub use render_device::*;

use crate::{
render_graph::RenderGraph,
render_phase::TrackedRenderPass,
render_resource::RenderPassDescriptor,
settings::{WgpuSettings, WgpuSettingsPriority},
view::{ExtractedWindows, ViewTarget},
};
Expand Down Expand Up @@ -279,3 +281,17 @@ pub struct RenderContext {
pub render_device: RenderDevice,
pub command_encoder: CommandEncoder,
}

impl RenderContext {
/// Creates a new [`TrackedRenderPass`] for the context,
/// configured using the provided `descriptor`.
pub fn begin_tracked_render_pass<'a>(
&'a mut self,
descriptor: RenderPassDescriptor<'a, '_>,
) -> TrackedRenderPass<'a> {
TrackedRenderPass::new(
&self.render_device,
self.command_encoder.begin_render_pass(&descriptor),
)
}
}