From 45fbacc17f24b83b29fab9b3c1e1472d32edd4f6 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 23 Aug 2025 23:21:54 -0500 Subject: [PATCH 01/35] Wait did I break it --- wgpu-hal/src/metal/adapter.rs | 12 +++++++++--- wgpu-hal/src/metal/command.rs | 32 ++++++++++++++++++++++++++------ wgpu-hal/src/metal/device.rs | 28 +++++++++++++--------------- wgpu-hal/src/metal/mod.rs | 21 ++++++++++++++------- 4 files changed, 62 insertions(+), 31 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 02dfc0fe601..9517f0b4dd6 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -606,6 +606,8 @@ impl super::PrivateCapabilities { } let argument_buffers = device.argument_buffers_support(); + let mesh_shaders = device.supports_family(MTLGPUFamily::Apple7) + || device.supports_family(MTLGPUFamily::Mac2); Self { family_check, @@ -902,6 +904,7 @@ impl super::PrivateCapabilities { && (device.supports_family(MTLGPUFamily::Apple7) || device.supports_family(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), + mesh_shaders, } } @@ -1003,6 +1006,8 @@ impl super::PrivateCapabilities { features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); } + features.set(F::EXPERIMENTAL_MESH_SHADER, self.mesh_shaders); + features } @@ -1079,10 +1084,11 @@ impl super::PrivateCapabilities { max_buffer_size: self.max_buffer_size, max_non_sampler_bindings: u32::MAX, - max_task_workgroup_total_count: 0, - max_task_workgroups_per_dimension: 0, + // See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf, Maximum threadgroups per mesh shader grid + max_task_workgroup_total_count: 1024, + max_task_workgroups_per_dimension: 1024, max_mesh_multiview_count: 0, - max_mesh_output_layers: 0, + max_mesh_output_layers: self.max_texture_layers as u32, max_blas_primitive_count: 0, // When added: 2^28 from https://developer.apple.com/documentation/metal/mtlaccelerationstructureusage/extendedlimits max_blas_geometry_count: 0, // When added: 2^24 diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 72a799a0275..2b66343c478 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -906,11 +906,22 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { self.state.raw_primitive_type = pipeline.raw_primitive_type; - self.state.stage_infos.vs.assign_from(&pipeline.vs_info); + match pipeline.vs_info { + Some(ref info) => self.state.stage_infos.vs.assign_from(info), + None => self.state.stage_infos.vs.clear(), + } match pipeline.fs_info { Some(ref info) => self.state.stage_infos.fs.assign_from(info), None => self.state.stage_infos.fs.clear(), } + match pipeline.ts_info { + Some(ref info) => self.state.stage_infos.ts.assign_from(info), + None => self.state.stage_infos.vs.clear(), + } + match pipeline.ms_info { + Some(ref info) => self.state.stage_infos.ms.assign_from(info), + None => self.state.stage_infos.fs.clear(), + } let encoder = self.state.render.as_ref().unwrap(); encoder.set_render_pipeline_state(&pipeline.raw); @@ -937,7 +948,7 @@ impl crate::CommandEncoder for super::CommandEncoder { ); } } - if pipeline.fs_lib.is_some() { + if pipeline.fs_info.is_some() { if let Some((index, sizes)) = self .state .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes) @@ -1111,11 +1122,20 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn draw_mesh_tasks( &mut self, - _group_count_x: u32, - _group_count_y: u32, - _group_count_z: u32, + group_count_x: u32, + group_count_y: u32, + group_count_z: u32, ) { - unreachable!() + let encoder = self.state.render.as_ref().unwrap(); + encoder.draw_mesh_threadgroups( + MTLSize { + width: group_count_x as u64, + height: group_count_y as u64, + depth: group_count_z as u64, + }, + todo!(), + todo!(), + ); } unsafe fn draw_indirect( diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 6af8ad3062d..97878960a36 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1078,7 +1078,7 @@ impl crate::Device for super::Device { conv::map_primitive_topology(desc.primitive.topology); // Vertex shader - let (vs_lib, vs_info) = { + let vs_info = { let mut vertex_buffer_mappings = Vec::::new(); for (i, vbl) in desc_vertex_buffers.iter().enumerate() { let mut attributes = Vec::::new(); @@ -1124,18 +1124,17 @@ impl crate::Device for super::Device { ); } - let info = super::PipelineStageInfo { + super::PipelineStageInfo { push_constants: desc.layout.push_constants_infos.vs, sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, sized_bindings: vs.sized_bindings, vertex_buffer_mappings, - }; - - (vs.library, info) + library: Some(vs.library), + } }; // Fragment shader - let (fs_lib, fs_info) = match desc.fragment_stage { + let fs_info = match desc.fragment_stage { Some(ref stage) => { let fs = self.load_shader( stage, @@ -1153,14 +1152,13 @@ impl crate::Device for super::Device { ); } - let info = super::PipelineStageInfo { + Some(super::PipelineStageInfo { push_constants: desc.layout.push_constants_infos.fs, sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer, sized_bindings: fs.sized_bindings, vertex_buffer_mappings: vec![], - }; - - (Some(fs.library), Some(info)) + library: Some(fs.library), + }) } None => { // TODO: This is a workaround for what appears to be a Metal validation bug @@ -1168,7 +1166,7 @@ impl crate::Device for super::Device { if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { descriptor.set_depth_attachment_pixel_format(MTLPixelFormat::Depth32Float); } - (None, None) + None } }; @@ -1302,10 +1300,10 @@ impl crate::Device for super::Device { Ok(super::RenderPipeline { raw, - vs_lib, - fs_lib, - vs_info, + vs_info: Some(vs_info), fs_info, + ts_info: None, + ms_info: None, raw_primitive_type, raw_triangle_fill_mode, raw_front_winding: conv::map_winding(desc.primitive.front_face), @@ -1373,6 +1371,7 @@ impl crate::Device for super::Device { } let cs_info = super::PipelineStageInfo { + library: Some(cs.library), push_constants: desc.layout.push_constants_infos.cs, sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, sized_bindings: cs.sized_bindings, @@ -1400,7 +1399,6 @@ impl crate::Device for super::Device { Ok(super::ComputePipeline { raw, cs_info, - cs_lib: cs.library, work_group_size: cs.wg_size, work_group_memory_sizes: cs.wg_memory_sizes, }) diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 00223b2f778..ec4ae11cdef 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -300,6 +300,7 @@ struct PrivateCapabilities { int64_atomics: bool, float_atomics: bool, supports_shared_event: bool, + mesh_shaders: bool, } #[derive(Clone, Debug)] @@ -604,12 +605,16 @@ struct MultiStageData { vs: T, fs: T, cs: T, + ts: T, + ms: T, } const NAGA_STAGES: MultiStageData = MultiStageData { vs: naga::ShaderStage::Vertex, fs: naga::ShaderStage::Fragment, cs: naga::ShaderStage::Compute, + ts: naga::ShaderStage::Task, + ms: naga::ShaderStage::Mesh, }; impl ops::Index for MultiStageData { @@ -630,6 +635,8 @@ impl MultiStageData { vs: fun(&self.vs), fs: fun(&self.fs), cs: fun(&self.cs), + ts: fun(&self.ts), + ms: fun(&self.ms), } } fn map(self, fun: impl Fn(T) -> Y) -> MultiStageData { @@ -637,6 +644,8 @@ impl MultiStageData { vs: fun(self.vs), fs: fun(self.fs), cs: fun(self.cs), + ts: fun(self.ts), + ms: fun(self.ms), } } fn iter<'a>(&'a self) -> impl Iterator { @@ -811,6 +820,8 @@ impl crate::DynShaderModule for ShaderModule {} #[derive(Debug, Default)] struct PipelineStageInfo { + #[allow(dead_code)] + library: Option, push_constants: Option, /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. @@ -849,12 +860,10 @@ impl PipelineStageInfo { #[derive(Debug)] pub struct RenderPipeline { raw: metal::RenderPipelineState, - #[allow(dead_code)] - vs_lib: metal::Library, - #[allow(dead_code)] - fs_lib: Option, - vs_info: PipelineStageInfo, + vs_info: Option, fs_info: Option, + ts_info: Option, + ms_info: Option, raw_primitive_type: MTLPrimitiveType, raw_triangle_fill_mode: MTLTriangleFillMode, raw_front_winding: MTLWinding, @@ -871,8 +880,6 @@ impl crate::DynRenderPipeline for RenderPipeline {} #[derive(Debug)] pub struct ComputePipeline { raw: metal::ComputePipelineState, - #[allow(dead_code)] - cs_lib: metal::Library, cs_info: PipelineStageInfo, work_group_size: MTLSize, work_group_memory_sizes: Vec, From 611c01a4566a3e6bb48dbf599df063db2b2b6449 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 00:11:50 -0500 Subject: [PATCH 02/35] More work --- wgpu-hal/src/metal/command.rs | 59 +++-- wgpu-hal/src/metal/device.rs | 393 +++++++++++++++++++++------------- wgpu-hal/src/metal/mod.rs | 5 +- 3 files changed, 299 insertions(+), 158 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 2b66343c478..37beb41a9a3 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -21,7 +21,6 @@ impl Default for super::CommandState { compute: None, raw_primitive_type: MTLPrimitiveType::Point, index: None, - raw_wg_size: MTLSize::new(0, 0, 0), stage_infos: Default::default(), storage_buffer_length_map: Default::default(), vertex_buffer_size_map: Default::default(), @@ -936,7 +935,7 @@ impl crate::CommandEncoder for super::CommandEncoder { encoder.set_depth_bias(bias.constant as f32, bias.slope_scale, bias.clamp); } - { + if pipeline.vs_info.is_some() { if let Some((index, sizes)) = self .state .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) @@ -960,6 +959,30 @@ impl crate::CommandEncoder for super::CommandEncoder { ); } } + if pipeline.ts_info.is_some() { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Task, &mut self.temp.binding_sizes) + { + encoder.set_object_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr().cast(), + ); + } + } + if pipeline.ms_info.is_some() { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Mesh, &mut self.temp.binding_sizes) + { + encoder.set_mesh_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr().cast(), + ); + } + } } unsafe fn set_index_buffer<'a>( @@ -1133,8 +1156,8 @@ impl crate::CommandEncoder for super::CommandEncoder { height: group_count_y as u64, depth: group_count_z as u64, }, - todo!(), - todo!(), + self.state.stage_infos.ts.raw_wg_size, + self.state.stage_infos.ms.raw_wg_size, ); } @@ -1174,11 +1197,20 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn draw_mesh_tasks_indirect( &mut self, - _buffer: &::Buffer, - _offset: wgt::BufferAddress, - _draw_count: u32, + buffer: &::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, ) { - unreachable!() + let encoder = self.state.render.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_mesh_threadgroups_with_indirect_buffer( + &buffer.raw, + offset, + self.state.stage_infos.ts.raw_wg_size, + self.state.stage_infos.ms.raw_wg_size, + ); + offset += size_of::() as wgt::BufferAddress; + } } unsafe fn draw_indirect_count( @@ -1210,7 +1242,7 @@ impl crate::CommandEncoder for super::CommandEncoder { _count_offset: wgt::BufferAddress, _max_count: u32, ) { - unreachable!() + //TODO } // compute @@ -1286,7 +1318,6 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { - self.state.raw_wg_size = pipeline.work_group_size; self.state.stage_infos.cs.assign_from(&pipeline.cs_info); let encoder = self.state.compute.as_ref().unwrap(); @@ -1330,13 +1361,17 @@ impl crate::CommandEncoder for super::CommandEncoder { height: count[1] as u64, depth: count[2] as u64, }; - encoder.dispatch_thread_groups(raw_count, self.state.raw_wg_size); + encoder.dispatch_thread_groups(raw_count, self.state.stage_infos.cs.raw_wg_size); } } unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { let encoder = self.state.compute.as_ref().unwrap(); - encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); + encoder.dispatch_thread_groups_indirect( + &buffer.raw, + offset, + self.state.stage_infos.cs.raw_wg_size, + ); } unsafe fn build_acceleration_structures<'a, T>( diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 97878960a36..6474136f4d7 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -18,6 +18,11 @@ use metal::{ type DeviceResult = Result; +enum MetalGenericRenderPipelineDescriptor { + Standard(metal::RenderPipelineDescriptor), + Mesh(metal::MeshRenderPipelineDescriptor), +} + struct CompiledShader { library: metal::Library, function: metal::Function, @@ -1054,83 +1059,207 @@ impl crate::Device for super::Device { super::PipelineCache, >, ) -> Result { - let (desc_vertex_stage, desc_vertex_buffers) = match &desc.vertex_processor { - crate::VertexProcessor::Standard { - vertex_buffers, - vertex_stage, - } => (vertex_stage, *vertex_buffers), - crate::VertexProcessor::Mesh { .. } => unreachable!(), - }; - objc::rc::autoreleasepool(|| { - let descriptor = metal::RenderPipelineDescriptor::new(); - - let raw_triangle_fill_mode = match desc.primitive.polygon_mode { - wgt::PolygonMode::Fill => MTLTriangleFillMode::Fill, - wgt::PolygonMode::Line => MTLTriangleFillMode::Lines, - wgt::PolygonMode::Point => panic!( - "{:?} is not enabled for this backend", - wgt::Features::POLYGON_MODE_POINT - ), - }; - let (primitive_class, raw_primitive_type) = conv::map_primitive_topology(desc.primitive.topology); - // Vertex shader - let vs_info = { - let mut vertex_buffer_mappings = Vec::::new(); - for (i, vbl) in desc_vertex_buffers.iter().enumerate() { - let mut attributes = Vec::::new(); - for attribute in vbl.attributes.iter() { - attributes.push(naga::back::msl::AttributeMapping { - shader_location: attribute.shader_location, - offset: attribute.offset as u32, - format: convert_vertex_format_to_naga(attribute.format), - }); - } + let vs_info; + let ts_info; + let ms_info; + let descriptor = match desc.vertex_processor { + crate::VertexProcessor::Standard { + vertex_buffers, + ref vertex_stage, + } => { + let descriptor = metal::RenderPipelineDescriptor::new(); + ts_info = None; + ms_info = None; + vs_info = Some({ + let mut vertex_buffer_mappings = + Vec::::new(); + for (i, vbl) in vertex_buffers.iter().enumerate() { + let mut attributes = Vec::::new(); + for attribute in vbl.attributes.iter() { + attributes.push(naga::back::msl::AttributeMapping { + shader_location: attribute.shader_location, + offset: attribute.offset as u32, + format: convert_vertex_format_to_naga(attribute.format), + }); + } + + vertex_buffer_mappings.push(naga::back::msl::VertexBufferMapping { + id: self.shared.private_caps.max_vertex_buffers - 1 - i as u32, + stride: if vbl.array_stride > 0 { + vbl.array_stride.try_into().unwrap() + } else { + vbl.attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0) + .try_into() + .unwrap() + }, + indexed_by_vertex: (vbl.step_mode + == wgt::VertexStepMode::Vertex {}), + attributes, + }); + } - vertex_buffer_mappings.push(naga::back::msl::VertexBufferMapping { - id: self.shared.private_caps.max_vertex_buffers - 1 - i as u32, - stride: if vbl.array_stride > 0 { - vbl.array_stride.try_into().unwrap() - } else { - vbl.attributes - .iter() - .map(|attribute| attribute.offset + attribute.format.size()) - .max() - .unwrap_or(0) - .try_into() - .unwrap() - }, - indexed_by_vertex: (vbl.step_mode == wgt::VertexStepMode::Vertex {}), - attributes, + let vs = self.load_shader( + vertex_stage, + &vertex_buffer_mappings, + desc.layout, + primitive_class, + naga::ShaderStage::Vertex, + )?; + + descriptor.set_vertex_function(Some(&vs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.vertex_buffers().unwrap(), + vs.immutable_buffer_mask, + ); + } + + super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.vs, + sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, + sized_bindings: vs.sized_bindings, + vertex_buffer_mappings, + library: Some(vs.library), + raw_wg_size: Default::default(), + } }); - } + if desc.layout.total_counters.vs.buffers + (vertex_buffers.len() as u32) + > self.shared.private_caps.max_vertex_buffers + { + let msg = format!( + "pipeline needs too many buffers in the vertex stage: {} vertex and {} layout", + vertex_buffers.len(), + desc.layout.total_counters.vs.buffers + ); + return Err(crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX, + msg, + )); + } - let vs = self.load_shader( - desc_vertex_stage, - &vertex_buffer_mappings, - desc.layout, - primitive_class, - naga::ShaderStage::Vertex, - )?; - - descriptor.set_vertex_function(Some(&vs.function)); - if self.shared.private_caps.supports_mutability { - Self::set_buffers_mutability( - descriptor.vertex_buffers().unwrap(), - vs.immutable_buffer_mask, - ); - } + if !vertex_buffers.is_empty() { + let vertex_descriptor = metal::VertexDescriptor::new(); + for (i, vb) in vertex_buffers.iter().enumerate() { + let buffer_index = + self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; + let buffer_desc = + vertex_descriptor.layouts().object_at(buffer_index).unwrap(); + + // Metal expects the stride to be the actual size of the attributes. + // The semantics of array_stride == 0 can be achieved by setting + // the step function to constant and rate to 0. + if vb.array_stride == 0 { + let stride = vb + .attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0); + buffer_desc.set_stride(wgt::math::align_to(stride, 4)); + buffer_desc.set_step_function(MTLVertexStepFunction::Constant); + buffer_desc.set_step_rate(0); + } else { + buffer_desc.set_stride(vb.array_stride); + buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + } - super::PipelineStageInfo { - push_constants: desc.layout.push_constants_infos.vs, - sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, - sized_bindings: vs.sized_bindings, - vertex_buffer_mappings, - library: Some(vs.library), + for at in vb.attributes { + let attribute_desc = vertex_descriptor + .attributes() + .object_at(at.shader_location as u64) + .unwrap(); + attribute_desc.set_format(conv::map_vertex_format(at.format)); + attribute_desc.set_buffer_index(buffer_index); + attribute_desc.set_offset(at.offset); + } + } + descriptor.set_vertex_descriptor(Some(vertex_descriptor)); + } + todo!() } + crate::VertexProcessor::Mesh { + ref task_stage, + ref mesh_stage, + } => { + vs_info = None; + let descriptor = metal::MeshRenderPipelineDescriptor::new(); + if let Some(ref task_stage) = task_stage { + let ts = self.load_shader( + task_stage, + &[], + desc.layout, + primitive_class, + naga::ShaderStage::Task, + )?; + descriptor.set_mesh_function(Some(&ts.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.mesh_buffers().unwrap(), + ts.immutable_buffer_mask, + ); + } + ts_info = Some(super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.ts, + sizes_slot: desc.layout.per_stage_map.ts.sizes_buffer, + sized_bindings: ts.sized_bindings, + vertex_buffer_mappings: vec![], + library: Some(ts.library), + raw_wg_size: Default::default(), + }); + } else { + ts_info = None; + } + { + let ms = self.load_shader( + mesh_stage, + &[], + desc.layout, + primitive_class, + naga::ShaderStage::Mesh, + )?; + descriptor.set_mesh_function(Some(&ms.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.mesh_buffers().unwrap(), + ms.immutable_buffer_mask, + ); + } + ms_info = Some(super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.ms, + sizes_slot: desc.layout.per_stage_map.ms.sizes_buffer, + sized_bindings: ms.sized_bindings, + vertex_buffer_mappings: vec![], + library: Some(ms.library), + raw_wg_size: Default::default(), + }); + } + MetalGenericRenderPipelineDescriptor::Mesh(descriptor) + } + }; + macro_rules! descriptor_fn { + ($method:ident $( ( $($args:expr),* ) )? ) => { + match descriptor { + MetalGenericRenderPipelineDescriptor::Standard(ref inner) => inner.$method$(($($args),*))?, + MetalGenericRenderPipelineDescriptor::Mesh(ref inner) => inner.$method$(($($args),*))?, + } + }; + } + + let raw_triangle_fill_mode = match desc.primitive.polygon_mode { + wgt::PolygonMode::Fill => MTLTriangleFillMode::Fill, + wgt::PolygonMode::Line => MTLTriangleFillMode::Lines, + wgt::PolygonMode::Point => panic!( + "{:?} is not enabled for this backend", + wgt::Features::POLYGON_MODE_POINT + ), }; // Fragment shader @@ -1144,10 +1273,10 @@ impl crate::Device for super::Device { naga::ShaderStage::Fragment, )?; - descriptor.set_fragment_function(Some(&fs.function)); + descriptor_fn!(set_fragment_function(Some(&fs.function))); if self.shared.private_caps.supports_mutability { Self::set_buffers_mutability( - descriptor.fragment_buffers().unwrap(), + descriptor_fn!(fragment_buffers()).unwrap(), fs.immutable_buffer_mask, ); } @@ -1158,20 +1287,25 @@ impl crate::Device for super::Device { sized_bindings: fs.sized_bindings, vertex_buffer_mappings: vec![], library: Some(fs.library), + raw_wg_size: Default::default(), }) } None => { // TODO: This is a workaround for what appears to be a Metal validation bug // A pixel format is required even though no attachments are provided if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { - descriptor.set_depth_attachment_pixel_format(MTLPixelFormat::Depth32Float); + descriptor_fn!(set_depth_attachment_pixel_format( + MTLPixelFormat::Depth32Float + )); } None } }; for (i, ct) in desc.color_targets.iter().enumerate() { - let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + let at_descriptor = descriptor_fn!(color_attachments()) + .object_at(i as u64) + .unwrap(); let ct = if let Some(color_target) = ct.as_ref() { color_target } else { @@ -1203,10 +1337,10 @@ impl crate::Device for super::Device { let raw_format = self.shared.private_caps.map_format(ds.format); let aspects = crate::FormatAspects::from(ds.format); if aspects.contains(crate::FormatAspects::DEPTH) { - descriptor.set_depth_attachment_pixel_format(raw_format); + descriptor_fn!(set_depth_attachment_pixel_format(raw_format)); } if aspects.contains(crate::FormatAspects::STENCIL) { - descriptor.set_stencil_attachment_pixel_format(raw_format); + descriptor_fn!(set_stencil_attachment_pixel_format(raw_format)); } let ds_descriptor = create_depth_stencil_desc(ds); @@ -1220,90 +1354,61 @@ impl crate::Device for super::Device { None => None, }; - if desc.layout.total_counters.vs.buffers + (desc_vertex_buffers.len() as u32) - > self.shared.private_caps.max_vertex_buffers - { - let msg = format!( - "pipeline needs too many buffers in the vertex stage: {} vertex and {} layout", - desc_vertex_buffers.len(), - desc.layout.total_counters.vs.buffers - ); - return Err(crate::PipelineError::Linkage( - wgt::ShaderStages::VERTEX, - msg, - )); - } - - if !desc_vertex_buffers.is_empty() { - let vertex_descriptor = metal::VertexDescriptor::new(); - for (i, vb) in desc_vertex_buffers.iter().enumerate() { - let buffer_index = - self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; - let buffer_desc = vertex_descriptor.layouts().object_at(buffer_index).unwrap(); - - // Metal expects the stride to be the actual size of the attributes. - // The semantics of array_stride == 0 can be achieved by setting - // the step function to constant and rate to 0. - if vb.array_stride == 0 { - let stride = vb - .attributes - .iter() - .map(|attribute| attribute.offset + attribute.format.size()) - .max() - .unwrap_or(0); - buffer_desc.set_stride(wgt::math::align_to(stride, 4)); - buffer_desc.set_step_function(MTLVertexStepFunction::Constant); - buffer_desc.set_step_rate(0); - } else { - buffer_desc.set_stride(vb.array_stride); - buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + if desc.multisample.count != 1 { + //TODO: handle sample mask + match descriptor { + MetalGenericRenderPipelineDescriptor::Standard(ref inner) => { + inner.set_sample_count(desc.multisample.count as u64); } - - for at in vb.attributes { - let attribute_desc = vertex_descriptor - .attributes() - .object_at(at.shader_location as u64) - .unwrap(); - attribute_desc.set_format(conv::map_vertex_format(at.format)); - attribute_desc.set_buffer_index(buffer_index); - attribute_desc.set_offset(at.offset); + MetalGenericRenderPipelineDescriptor::Mesh(ref inner) => { + inner.set_raster_sample_count(desc.multisample.count as u64); } } - descriptor.set_vertex_descriptor(Some(vertex_descriptor)); - } - - if desc.multisample.count != 1 { - //TODO: handle sample mask - descriptor.set_sample_count(desc.multisample.count as u64); - descriptor - .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); + descriptor_fn!(set_alpha_to_coverage_enabled( + desc.multisample.alpha_to_coverage_enabled + )); //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); } if let Some(name) = desc.label { - descriptor.set_label(name); + descriptor_fn!(set_label(name)); } - let raw = self - .shared - .device - .lock() - .new_render_pipeline_state(&descriptor) - .map_err(|e| { - crate::PipelineError::Linkage( - wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, - format!("new_render_pipeline_state: {e:?}"), - ) - })?; + let raw = match descriptor { + MetalGenericRenderPipelineDescriptor::Standard(d) => self + .shared + .device + .lock() + .new_render_pipeline_state(&d) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {e:?}"), + ) + })?, + MetalGenericRenderPipelineDescriptor::Mesh(d) => self + .shared + .device + .lock() + .new_mesh_render_pipeline_state(&d) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::TASK + | wgt::ShaderStages::MESH + | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {e:?}"), + ) + })?, + }; self.counters.render_pipelines.add(1); Ok(super::RenderPipeline { raw, - vs_info: Some(vs_info), + vs_info, fs_info, - ts_info: None, - ms_info: None, + ts_info, + ms_info, raw_primitive_type, raw_triangle_fill_mode, raw_front_winding: conv::map_winding(desc.primitive.front_face), @@ -1376,6 +1481,7 @@ impl crate::Device for super::Device { sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, sized_bindings: cs.sized_bindings, vertex_buffer_mappings: vec![], + raw_wg_size: cs.wg_size, }; if let Some(name) = desc.label { @@ -1399,7 +1505,6 @@ impl crate::Device for super::Device { Ok(super::ComputePipeline { raw, cs_info, - work_group_size: cs.wg_size, work_group_memory_sizes: cs.wg_memory_sizes, }) }) diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index ec4ae11cdef..a9d9e19b57b 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -836,6 +836,9 @@ struct PipelineStageInfo { /// Info on all bound vertex buffers. vertex_buffer_mappings: Vec, + + /// The workgroup size for compute, task or mesh stages + raw_wg_size: MTLSize, } impl PipelineStageInfo { @@ -881,7 +884,6 @@ impl crate::DynRenderPipeline for RenderPipeline {} pub struct ComputePipeline { raw: metal::ComputePipelineState, cs_info: PipelineStageInfo, - work_group_size: MTLSize, work_group_memory_sizes: Vec, } @@ -956,7 +958,6 @@ struct CommandState { compute: Option, raw_primitive_type: MTLPrimitiveType, index: Option, - raw_wg_size: MTLSize, stage_infos: MultiStageData, /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. From 3d36680bca124a3d61a7e426553c71a6bdb4eab6 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 00:13:03 -0500 Subject: [PATCH 03/35] Oops --- wgpu-hal/src/metal/device.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 6474136f4d7..4f1154c42c3 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1183,7 +1183,7 @@ impl crate::Device for super::Device { } descriptor.set_vertex_descriptor(Some(vertex_descriptor)); } - todo!() + MetalGenericRenderPipelineDescriptor::Standard(descriptor) } crate::VertexProcessor::Mesh { ref task_stage, From c9c39fd4ab74d7d0516c3e556fb86e9935dde031 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 00:22:57 -0500 Subject: [PATCH 04/35] Another refactor --- wgpu-hal/src/metal/adapter.rs | 5 ++--- wgpu-hal/src/metal/command.rs | 6 ++++-- wgpu-hal/src/metal/device.rs | 11 ++++++----- wgpu-hal/src/metal/mod.rs | 4 +++- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 9517f0b4dd6..d298ee7da15 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -606,8 +606,6 @@ impl super::PrivateCapabilities { } let argument_buffers = device.argument_buffers_support(); - let mesh_shaders = device.supports_family(MTLGPUFamily::Apple7) - || device.supports_family(MTLGPUFamily::Mac2); Self { family_check, @@ -904,7 +902,8 @@ impl super::PrivateCapabilities { && (device.supports_family(MTLGPUFamily::Apple7) || device.supports_family(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), - mesh_shaders, + mesh_shaders: device.supports_family(MTLGPUFamily::Apple7) + || device.supports_family(MTLGPUFamily::Mac2), } } diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 37beb41a9a3..db282a8d91e 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1335,14 +1335,16 @@ impl crate::CommandEncoder for super::CommandEncoder { } // update the threadgroup memory sizes - while self.state.work_group_memory_sizes.len() < pipeline.work_group_memory_sizes.len() { + while self.state.work_group_memory_sizes.len() + < pipeline.cs_info.work_group_memory_sizes.len() + { self.state.work_group_memory_sizes.push(0); } for (index, (cur_size, pipeline_size)) in self .state .work_group_memory_sizes .iter_mut() - .zip(pipeline.work_group_memory_sizes.iter()) + .zip(pipeline.cs_info.work_group_memory_sizes.iter()) .enumerate() { let size = pipeline_size.next_multiple_of(16); diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 4f1154c42c3..ee1a74b2131 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1129,6 +1129,7 @@ impl crate::Device for super::Device { vertex_buffer_mappings, library: Some(vs.library), raw_wg_size: Default::default(), + work_group_memory_sizes: vec![], } }); if desc.layout.total_counters.vs.buffers + (vertex_buffers.len() as u32) @@ -1213,6 +1214,7 @@ impl crate::Device for super::Device { vertex_buffer_mappings: vec![], library: Some(ts.library), raw_wg_size: Default::default(), + work_group_memory_sizes: vec![], }); } else { ts_info = None; @@ -1239,6 +1241,7 @@ impl crate::Device for super::Device { vertex_buffer_mappings: vec![], library: Some(ms.library), raw_wg_size: Default::default(), + work_group_memory_sizes: vec![], }); } MetalGenericRenderPipelineDescriptor::Mesh(descriptor) @@ -1288,6 +1291,7 @@ impl crate::Device for super::Device { vertex_buffer_mappings: vec![], library: Some(fs.library), raw_wg_size: Default::default(), + work_group_memory_sizes: vec![], }) } None => { @@ -1482,6 +1486,7 @@ impl crate::Device for super::Device { sized_bindings: cs.sized_bindings, vertex_buffer_mappings: vec![], raw_wg_size: cs.wg_size, + work_group_memory_sizes: cs.wg_memory_sizes, }; if let Some(name) = desc.label { @@ -1502,11 +1507,7 @@ impl crate::Device for super::Device { self.counters.compute_pipelines.add(1); - Ok(super::ComputePipeline { - raw, - cs_info, - work_group_memory_sizes: cs.wg_memory_sizes, - }) + Ok(super::ComputePipeline { raw, cs_info }) }) } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index a9d9e19b57b..c2d2a80a214 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -839,6 +839,9 @@ struct PipelineStageInfo { /// The workgroup size for compute, task or mesh stages raw_wg_size: MTLSize, + + /// The workgroup memory sizes for compute task or mesh stages + work_group_memory_sizes: Vec, } impl PipelineStageInfo { @@ -884,7 +887,6 @@ impl crate::DynRenderPipeline for RenderPipeline {} pub struct ComputePipeline { raw: metal::ComputePipelineState, cs_info: PipelineStageInfo, - work_group_memory_sizes: Vec, } unsafe impl Send for ComputePipeline {} From fb330288f734898ac0e6a000ba32e4f3d23e3b4b Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 01:49:43 -0500 Subject: [PATCH 05/35] Another slight refactor --- wgpu-hal/src/metal/command.rs | 8 ++++---- wgpu-hal/src/metal/mod.rs | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index db282a8d91e..a91035b642f 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -24,7 +24,6 @@ impl Default for super::CommandState { stage_infos: Default::default(), storage_buffer_length_map: Default::default(), vertex_buffer_size_map: Default::default(), - work_group_memory_sizes: Vec::new(), push_constants: Vec::new(), pending_timer_queries: Vec::new(), } @@ -149,7 +148,6 @@ impl super::CommandState { self.stage_infos.vs.clear(); self.stage_infos.fs.clear(); self.stage_infos.cs.clear(); - self.work_group_memory_sizes.clear(); self.push_constants.clear(); } @@ -1335,13 +1333,15 @@ impl crate::CommandEncoder for super::CommandEncoder { } // update the threadgroup memory sizes - while self.state.work_group_memory_sizes.len() + while self.state.stage_infos.cs.work_group_memory_sizes.len() < pipeline.cs_info.work_group_memory_sizes.len() { - self.state.work_group_memory_sizes.push(0); + self.state.stage_infos.cs.work_group_memory_sizes.push(0); } for (index, (cur_size, pipeline_size)) in self .state + .stage_infos + .cs .work_group_memory_sizes .iter_mut() .zip(pipeline.cs_info.work_group_memory_sizes.iter()) diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index c2d2a80a214..c4d9992e7db 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -850,6 +850,9 @@ impl PipelineStageInfo { self.sizes_slot = None; self.sized_bindings.clear(); self.vertex_buffer_mappings.clear(); + self.library = None; + self.work_group_memory_sizes.clear(); + self.raw_wg_size = Default::default(); } fn assign_from(&mut self, other: &Self) { @@ -985,7 +988,6 @@ struct CommandState { vertex_buffer_size_map: FastHashMap, - work_group_memory_sizes: Vec, push_constants: Vec, /// Timer query that should be executed when the next pass starts. From ece1ea10c1c9c3cebbc2db4ec1742e5aac1eb289 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 01:58:22 -0500 Subject: [PATCH 06/35] Another slight refactor --- wgpu-hal/src/metal/command.rs | 2 ++ wgpu-hal/src/metal/mod.rs | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index a91035b642f..a83540a9a37 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -148,6 +148,8 @@ impl super::CommandState { self.stage_infos.vs.clear(); self.stage_infos.fs.clear(); self.stage_infos.cs.clear(); + self.stage_infos.ts.clear(); + self.stage_infos.ms.clear(); self.push_constants.clear(); } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index c4d9992e7db..1e7b5281240 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -863,6 +863,11 @@ impl PipelineStageInfo { self.vertex_buffer_mappings.clear(); self.vertex_buffer_mappings .extend_from_slice(&other.vertex_buffer_mappings); + self.library = Some(other.library.as_ref().unwrap().clone()); + self.raw_wg_size = other.raw_wg_size; + self.work_group_memory_sizes.clear(); + self.work_group_memory_sizes + .extend_from_slice(&other.work_group_memory_sizes); } } From 47c187b40ed14a08112ad0221dff9295c6190259 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 01:59:49 -0500 Subject: [PATCH 07/35] Fixed it --- wgpu-hal/src/metal/command.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index a83540a9a37..542287983e9 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -915,11 +915,11 @@ impl crate::CommandEncoder for super::CommandEncoder { } match pipeline.ts_info { Some(ref info) => self.state.stage_infos.ts.assign_from(info), - None => self.state.stage_infos.vs.clear(), + None => self.state.stage_infos.ts.clear(), } match pipeline.ms_info { Some(ref info) => self.state.stage_infos.ms.assign_from(info), - None => self.state.stage_infos.fs.clear(), + None => self.state.stage_infos.ms.clear(), } let encoder = self.state.render.as_ref().unwrap(); From 8bc63b662a542971d6b8f3c19284e35fc3417592 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 02:33:22 -0500 Subject: [PATCH 08/35] Worked a little more on trying to add it to example --- examples/features/src/mesh_shader/mod.rs | 32 ++++++-- .../features/src/mesh_shader/shader.metal | 74 +++++++++++++++++++ wgpu-types/src/lib.rs | 4 +- 3 files changed, 102 insertions(+), 8 deletions(-) create mode 100644 examples/features/src/mesh_shader/shader.metal diff --git a/examples/features/src/mesh_shader/mod.rs b/examples/features/src/mesh_shader/mod.rs index 675150f5106..e21e7ae2c95 100644 --- a/examples/features/src/mesh_shader/mod.rs +++ b/examples/features/src/mesh_shader/mod.rs @@ -33,13 +33,25 @@ fn compile_glsl( } } +fn compile_msl(device: &wgpu::Device, entry: &str) -> wgpu::ShaderModule { + unsafe { + device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough { + entry_point: entry.to_owned(), + label: None, + msl: Some(std::borrow::Cow::Borrowed(include_str!("shader.metal"))), + num_workgroups: (1, 1, 1), + ..Default::default() + }) + } +} + pub struct Example { pipeline: wgpu::RenderPipeline, } impl crate::framework::Example for Example { fn init( config: &wgpu::SurfaceConfiguration, - _adapter: &wgpu::Adapter, + adapter: &wgpu::Adapter, device: &wgpu::Device, _queue: &wgpu::Queue, ) -> Self { @@ -48,11 +60,19 @@ impl crate::framework::Example for Example { bind_group_layouts: &[], push_constant_ranges: &[], }); - let (ts, ms, fs) = ( - compile_glsl(device, include_bytes!("shader.task"), "task"), - compile_glsl(device, include_bytes!("shader.mesh"), "mesh"), - compile_glsl(device, include_bytes!("shader.frag"), "frag"), - ); + let (ts, ms, fs) = if adapter.get_info().backend == wgpu::Backend::Metal { + ( + compile_msl(device, "taskShader"), + compile_msl(device, "meshShader"), + compile_msl(device, "fragShader"), + ) + } else { + ( + compile_glsl(device, include_bytes!("shader.task"), "task"), + compile_glsl(device, include_bytes!("shader.mesh"), "mesh"), + compile_glsl(device, include_bytes!("shader.frag"), "frag"), + ) + }; let pipeline = device.create_mesh_pipeline(&wgpu::MeshPipelineDescriptor { label: None, layout: Some(&pipeline_layout), diff --git a/examples/features/src/mesh_shader/shader.metal b/examples/features/src/mesh_shader/shader.metal new file mode 100644 index 00000000000..0a563132a19 --- /dev/null +++ b/examples/features/src/mesh_shader/shader.metal @@ -0,0 +1,74 @@ +using namespace metal; + +struct OutVertex { + float4 Position [[position]]; + float4 Color; +}; + +struct OutPrimitive { + float4 ColorMask [[flat]]; + bool CullPrimitive; +}; + +struct InVertex { + float4 Color; +}; + +struct InPrimitive { + float4 ColorMask [[flat]]; +}; + +struct PayloadData { + float4 ColorMask; + bool Visible; +}; + +using Meshlet = metal::mesh; + + +constant float4 positions[3] = { + float4(0.0, 1.0, 0.0, 1.0), + float4(-1.0, -1.0, 0.0, 1.0), + float4(1.0, -1.0, 0.0, 1.0) +}; + +constant float4 colors[3] = { + float4(0.0, 1.0, 0.0, 1.0), + float4(0.0, 0.0, 1.0, 1.0), + float4(1.0, 0.0, 0.0, 1.0) +}; + + +[[object]] +void taskShader(uint3 tid [[thread_position_in_grid]], object_data PayloadData &outPayload [[payload]], grid_properties grid) { + outPayload.ColorMask = float4(1.0, 1.0, 0.0, 1.0); + outPayload.Visible = true; + grid.set_threadgroups_per_grid(uint3(3, 1, 1)); +} + +[[mesh, topology(triangle)]] +void meshShader( + object_data PayloadData const& payload [[payload]], + Meshlet out, +) +{ + out.set_primitive_count(1); + + for(int i = 0;i < 3;i++) { + OutVertex vert; + vert.Position = positions[i]; + vert.Color = colors[i] * payload.ColorMask; + mesh.set_vertex(i, vert); + out.set_index(i, i); + } + + triangles[0] = uint3(0, 1, 2); + OutPrimitive prim; + prim.ColorMask = float4(1.0, 0.0, 0.0, 1.0); + prim.CullPrimitive = !payload.Visible; + out.set_primitive(0, prim); +} + +fragment float4 fragShader(OutVertex inVertex [[stage_in]], OutPrimitive inPrimitive [[stage_in]]) { + return inVertex.Color * inPrimitive.ColorMask; +} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index ea2a09eb62a..828136a690c 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -979,8 +979,8 @@ impl Limits { // Literally just made this up as 256^2 or 2^16. // My GPU supports 2^22, and compute shaders don't have this kind of limit. // This very likely is never a real limiter - max_task_workgroup_total_count: 65536, - max_task_workgroups_per_dimension: 256, + max_task_workgroup_total_count: 1024, + max_task_workgroups_per_dimension: 1024, // llvmpipe reports 0 multiview count, which just means no multiview is allowed max_mesh_multiview_count: 0, // llvmpipe once again requires this to be 8. An RTX 3060 supports well over 1024. From 55d6bf3b3ab94c3ea16d09856bc393d98a9b67ed Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 02:40:34 -0500 Subject: [PATCH 09/35] Fixed metal shader --- examples/features/src/mesh_shader/shader.metal | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/features/src/mesh_shader/shader.metal b/examples/features/src/mesh_shader/shader.metal index 0a563132a19..65edc83e442 100644 --- a/examples/features/src/mesh_shader/shader.metal +++ b/examples/features/src/mesh_shader/shader.metal @@ -18,6 +18,11 @@ struct InPrimitive { float4 ColorMask [[flat]]; }; +struct FragmentIn { + InVertex vert; + InPrimitive prim; +}; + struct PayloadData { float4 ColorMask; bool Visible; @@ -40,16 +45,16 @@ constant float4 colors[3] = { [[object]] -void taskShader(uint3 tid [[thread_position_in_grid]], object_data PayloadData &outPayload [[payload]], grid_properties grid) { +void taskShader(uint3 tid [[thread_position_in_grid]], object_data PayloadData &outPayload [[payload]], mesh_grid_properties grid) { outPayload.ColorMask = float4(1.0, 1.0, 0.0, 1.0); outPayload.Visible = true; grid.set_threadgroups_per_grid(uint3(3, 1, 1)); } -[[mesh, topology(triangle)]] +[[mesh]] void meshShader( object_data PayloadData const& payload [[payload]], - Meshlet out, + Meshlet out ) { out.set_primitive_count(1); @@ -58,17 +63,16 @@ void meshShader( OutVertex vert; vert.Position = positions[i]; vert.Color = colors[i] * payload.ColorMask; - mesh.set_vertex(i, vert); + out.set_vertex(i, vert); out.set_index(i, i); } - triangles[0] = uint3(0, 1, 2); OutPrimitive prim; prim.ColorMask = float4(1.0, 0.0, 0.0, 1.0); prim.CullPrimitive = !payload.Visible; out.set_primitive(0, prim); } -fragment float4 fragShader(OutVertex inVertex [[stage_in]], OutPrimitive inPrimitive [[stage_in]]) { - return inVertex.Color * inPrimitive.ColorMask; +fragment float4 fragShader(FragmentIn data [[stage_in]]) { + return data.vert.Color * data.prim.ColorMask; } From edfd494cbd71ffa8b9b2dd82583e91a9e5c68a18 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 02:56:17 -0500 Subject: [PATCH 10/35] Fixed some passthrough stuff, now it runs (uggh) --- wgpu-hal/src/metal/device.rs | 326 ++++++++++++++++++----------------- wgpu-hal/src/metal/mod.rs | 3 +- 2 files changed, 174 insertions(+), 155 deletions(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index ee1a74b2131..3a48c9e8ead 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -133,176 +133,194 @@ impl super::Device { primitive_class: MTLPrimitiveTopologyClass, naga_stage: naga::ShaderStage, ) -> Result { - let naga_shader = if let ShaderModuleSource::Naga(naga) = &stage.module.source { - naga - } else { - panic!("load_shader required a naga shader"); - }; - let stage_bit = map_naga_stage(naga_stage); - let (module, module_info) = naga::back::pipeline_constants::process_overrides( - &naga_shader.module, - &naga_shader.info, - Some((naga_stage, stage.entry_point)), - stage.constants, - ) - .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("MSL: {e:?}")))?; - - let ep_resources = &layout.per_stage_map[naga_stage]; - - let bounds_check_policy = if stage.module.bounds_checks.bounds_checks { - naga::proc::BoundsCheckPolicy::Restrict - } else { - naga::proc::BoundsCheckPolicy::Unchecked - }; + match stage.module.source { + ShaderModuleSource::Naga(ref naga_shader) => { + let stage_bit = map_naga_stage(naga_stage); + let (module, module_info) = naga::back::pipeline_constants::process_overrides( + &naga_shader.module, + &naga_shader.info, + Some((naga_stage, stage.entry_point)), + stage.constants, + ) + .map_err(|e| { + crate::PipelineError::PipelineConstants(stage_bit, format!("MSL: {e:?}")) + })?; - let options = naga::back::msl::Options { - lang_version: match self.shared.private_caps.msl_version { - MTLLanguageVersion::V1_0 => (1, 0), - MTLLanguageVersion::V1_1 => (1, 1), - MTLLanguageVersion::V1_2 => (1, 2), - MTLLanguageVersion::V2_0 => (2, 0), - MTLLanguageVersion::V2_1 => (2, 1), - MTLLanguageVersion::V2_2 => (2, 2), - MTLLanguageVersion::V2_3 => (2, 3), - MTLLanguageVersion::V2_4 => (2, 4), - MTLLanguageVersion::V3_0 => (3, 0), - MTLLanguageVersion::V3_1 => (3, 1), - }, - inline_samplers: Default::default(), - spirv_cross_compatibility: false, - fake_missing_bindings: false, - per_entry_point_map: naga::back::msl::EntryPointResourceMap::from([( - stage.entry_point.to_owned(), - ep_resources.clone(), - )]), - bounds_check_policies: naga::proc::BoundsCheckPolicies { - index: bounds_check_policy, - buffer: bounds_check_policy, - image_load: bounds_check_policy, - // TODO: support bounds checks on binding arrays - binding_array: naga::proc::BoundsCheckPolicy::Unchecked, - }, - zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, - force_loop_bounding: stage.module.bounds_checks.force_loop_bounding, - }; + let ep_resources = &layout.per_stage_map[naga_stage]; - let pipeline_options = naga::back::msl::PipelineOptions { - entry_point: Some((naga_stage, stage.entry_point.to_owned())), - allow_and_force_point_size: match primitive_class { - MTLPrimitiveTopologyClass::Point => true, - _ => false, - }, - vertex_pulling_transform: true, - vertex_buffer_mappings: vertex_buffer_mappings.to_vec(), - }; + let bounds_check_policy = if stage.module.bounds_checks.bounds_checks { + naga::proc::BoundsCheckPolicy::Restrict + } else { + naga::proc::BoundsCheckPolicy::Unchecked + }; - let (source, info) = - naga::back::msl::write_string(&module, &module_info, &options, &pipeline_options) - .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {e:?}")))?; + let options = naga::back::msl::Options { + lang_version: match self.shared.private_caps.msl_version { + MTLLanguageVersion::V1_0 => (1, 0), + MTLLanguageVersion::V1_1 => (1, 1), + MTLLanguageVersion::V1_2 => (1, 2), + MTLLanguageVersion::V2_0 => (2, 0), + MTLLanguageVersion::V2_1 => (2, 1), + MTLLanguageVersion::V2_2 => (2, 2), + MTLLanguageVersion::V2_3 => (2, 3), + MTLLanguageVersion::V2_4 => (2, 4), + MTLLanguageVersion::V3_0 => (3, 0), + MTLLanguageVersion::V3_1 => (3, 1), + }, + inline_samplers: Default::default(), + spirv_cross_compatibility: false, + fake_missing_bindings: false, + per_entry_point_map: naga::back::msl::EntryPointResourceMap::from([( + stage.entry_point.to_owned(), + ep_resources.clone(), + )]), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: bounds_check_policy, + buffer: bounds_check_policy, + image_load: bounds_check_policy, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, + force_loop_bounding: stage.module.bounds_checks.force_loop_bounding, + }; - log::debug!( - "Naga generated shader for entry point '{}' and stage {:?}\n{}", - stage.entry_point, - naga_stage, - &source - ); + let pipeline_options = naga::back::msl::PipelineOptions { + entry_point: Some((naga_stage, stage.entry_point.to_owned())), + allow_and_force_point_size: match primitive_class { + MTLPrimitiveTopologyClass::Point => true, + _ => false, + }, + vertex_pulling_transform: true, + vertex_buffer_mappings: vertex_buffer_mappings.to_vec(), + }; - let options = metal::CompileOptions::new(); - options.set_language_version(self.shared.private_caps.msl_version); + let (source, info) = naga::back::msl::write_string( + &module, + &module_info, + &options, + &pipeline_options, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {e:?}")))?; - if self.shared.private_caps.supports_preserve_invariance { - options.set_preserve_invariance(true); - } + log::debug!( + "Naga generated shader for entry point '{}' and stage {:?}\n{}", + stage.entry_point, + naga_stage, + &source + ); - let library = self - .shared - .device - .lock() - .new_library_with_source(source.as_ref(), &options) - .map_err(|err| { - log::warn!("Naga generated shader:\n{source}"); - crate::PipelineError::Linkage(stage_bit, format!("Metal: {err}")) - })?; - - let ep_index = module - .entry_points - .iter() - .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) - .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; - let ep = &module.entry_points[ep_index]; - let translated_ep_name = info.entry_point_names[0] - .as_ref() - .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{e}")))?; - - let wg_size = MTLSize { - width: ep.workgroup_size[0] as _, - height: ep.workgroup_size[1] as _, - depth: ep.workgroup_size[2] as _, - }; + let options = metal::CompileOptions::new(); + options.set_language_version(self.shared.private_caps.msl_version); - let function = library - .get_function(translated_ep_name, None) - .map_err(|e| { - log::error!("get_function: {e:?}"); - crate::PipelineError::EntryPoint(naga_stage) - })?; - - // collect sizes indices, immutable buffers, and work group memory sizes - let ep_info = &module_info.get_entry_point(ep_index); - let mut wg_memory_sizes = Vec::new(); - let mut sized_bindings = Vec::new(); - let mut immutable_buffer_mask = 0; - for (var_handle, var) in module.global_variables.iter() { - match var.space { - naga::AddressSpace::WorkGroup => { - if !ep_info[var_handle].is_empty() { - let size = module.types[var.ty].inner.size(module.to_ctx()); - wg_memory_sizes.push(size); - } + if self.shared.private_caps.supports_preserve_invariance { + options.set_preserve_invariance(true); } - naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { - let br = match var.binding { - Some(br) => br, - None => continue, - }; - let storage_access_store = match var.space { - naga::AddressSpace::Storage { access } => { - access.contains(naga::StorageAccess::STORE) + + let library = self + .shared + .device + .lock() + .new_library_with_source(source.as_ref(), &options) + .map_err(|err| { + log::warn!("Naga generated shader:\n{source}"); + crate::PipelineError::Linkage(stage_bit, format!("Metal: {err}")) + })?; + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + let ep = &module.entry_points[ep_index]; + let translated_ep_name = info.entry_point_names[0] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{e}")))?; + + let wg_size = MTLSize { + width: ep.workgroup_size[0] as _, + height: ep.workgroup_size[1] as _, + depth: ep.workgroup_size[2] as _, + }; + + let function = library + .get_function(translated_ep_name, None) + .map_err(|e| { + log::error!("get_function: {e:?}"); + crate::PipelineError::EntryPoint(naga_stage) + })?; + + // collect sizes indices, immutable buffers, and work group memory sizes + let ep_info = &module_info.get_entry_point(ep_index); + let mut wg_memory_sizes = Vec::new(); + let mut sized_bindings = Vec::new(); + let mut immutable_buffer_mask = 0; + for (var_handle, var) in module.global_variables.iter() { + match var.space { + naga::AddressSpace::WorkGroup => { + if !ep_info[var_handle].is_empty() { + let size = module.types[var.ty].inner.size(module.to_ctx()); + wg_memory_sizes.push(size); + } } - _ => false, - }; + naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { + let br = match var.binding { + Some(br) => br, + None => continue, + }; + let storage_access_store = match var.space { + naga::AddressSpace::Storage { access } => { + access.contains(naga::StorageAccess::STORE) + } + _ => false, + }; - // check for an immutable buffer - if !ep_info[var_handle].is_empty() && !storage_access_store { - let slot = ep_resources.resources[&br].buffer.unwrap(); - immutable_buffer_mask |= 1 << slot; - } + // check for an immutable buffer + if !ep_info[var_handle].is_empty() && !storage_access_store { + let slot = ep_resources.resources[&br].buffer.unwrap(); + immutable_buffer_mask |= 1 << slot; + } - let mut dynamic_array_container_ty = var.ty; - if let naga::TypeInner::Struct { ref members, .. } = module.types[var.ty].inner - { - dynamic_array_container_ty = members.last().unwrap().ty; - } - if let naga::TypeInner::Array { - size: naga::ArraySize::Dynamic, - .. - } = module.types[dynamic_array_container_ty].inner - { - sized_bindings.push(br); + let mut dynamic_array_container_ty = var.ty; + if let naga::TypeInner::Struct { ref members, .. } = + module.types[var.ty].inner + { + dynamic_array_container_ty = members.last().unwrap().ty; + } + if let naga::TypeInner::Array { + size: naga::ArraySize::Dynamic, + .. + } = module.types[dynamic_array_container_ty].inner + { + sized_bindings.push(br); + } + } + _ => {} } } - _ => {} + + Ok(CompiledShader { + library, + function, + wg_size, + wg_memory_sizes, + sized_bindings, + immutable_buffer_mask, + }) } + ShaderModuleSource::Passthrough(ref shader) => Ok(CompiledShader { + library: shader.library.clone(), + function: shader.function.clone(), + wg_size: MTLSize { + width: shader.num_workgroups.0 as u64, + height: shader.num_workgroups.1 as u64, + depth: shader.num_workgroups.2 as u64, + }, + wg_memory_sizes: vec![], + sized_bindings: vec![], + immutable_buffer_mask: 0, + }), } - - Ok(CompiledShader { - library, - function, - wg_size, - wg_memory_sizes, - sized_bindings, - immutable_buffer_mask, - }) } fn set_buffers_mutability( diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 1e7b5281240..fda7e001906 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -624,7 +624,8 @@ impl ops::Index for MultiStageData { naga::ShaderStage::Vertex => &self.vs, naga::ShaderStage::Fragment => &self.fs, naga::ShaderStage::Compute => &self.cs, - naga::ShaderStage::Task | naga::ShaderStage::Mesh => unreachable!(), + naga::ShaderStage::Task => &self.ts, + naga::ShaderStage::Mesh => &self.ms, } } } From d4725b1f14cd6f8c6ba00068884e0dea22a71343 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 13:54:15 -0500 Subject: [PATCH 11/35] Small update to test shader (still blank screen) --- examples/features/src/mesh_shader/shader.metal | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/features/src/mesh_shader/shader.metal b/examples/features/src/mesh_shader/shader.metal index 65edc83e442..5c99fffc231 100644 --- a/examples/features/src/mesh_shader/shader.metal +++ b/examples/features/src/mesh_shader/shader.metal @@ -2,20 +2,20 @@ using namespace metal; struct OutVertex { float4 Position [[position]]; - float4 Color; + float4 Color [[user(locn0)]]; }; struct OutPrimitive { - float4 ColorMask [[flat]]; - bool CullPrimitive; + float4 ColorMask [[flat]] [[user(locn1)]]; + bool CullPrimitive [[primitive_culled]]; }; struct InVertex { - float4 Color; + float4 Color [[user(locn0)]]; }; struct InPrimitive { - float4 ColorMask [[flat]]; + float4 ColorMask [[flat]] [[user(locn1)]]; }; struct FragmentIn { From 7efae60bd9138c37f095c946fd4b349b0f454eb6 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 13:56:20 -0500 Subject: [PATCH 12/35] Another quick update to the shader --- examples/features/src/mesh_shader/shader.metal | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/features/src/mesh_shader/shader.metal b/examples/features/src/mesh_shader/shader.metal index 5c99fffc231..4c7da503832 100644 --- a/examples/features/src/mesh_shader/shader.metal +++ b/examples/features/src/mesh_shader/shader.metal @@ -11,7 +11,6 @@ struct OutPrimitive { }; struct InVertex { - float4 Color [[user(locn0)]]; }; struct InPrimitive { @@ -19,8 +18,8 @@ struct InPrimitive { }; struct FragmentIn { - InVertex vert; - InPrimitive prim; + float4 Color [[user(locn0)]]; + float4 ColorMask [[flat]] [[user(locn1)]]; }; struct PayloadData { @@ -74,5 +73,5 @@ void meshShader( } fragment float4 fragShader(FragmentIn data [[stage_in]]) { - return data.vert.Color * data.prim.ColorMask; + return data.Color * data.ColorMask; } From bd79d513438e30f1fe845490ddda835f6f9f46ef Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 13:59:38 -0500 Subject: [PATCH 13/35] Made mesh shader tests get skipped on metal due to not having MSL passthrough yet --- tests/tests/wgpu-gpu/mesh_shader/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/tests/wgpu-gpu/mesh_shader/mod.rs b/tests/tests/wgpu-gpu/mesh_shader/mod.rs index 4dd897129f6..ae705c92341 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/mod.rs +++ b/tests/tests/wgpu-gpu/mesh_shader/mod.rs @@ -86,6 +86,9 @@ fn mesh_pipeline_build( frag: Option<&[u8]>, draw: bool, ) { + if ctx.adapter.get_info().backend != wgpu::Backend::Vulkan { + return; + } let device = &ctx.device; let (_depth_image, depth_view, depth_state) = create_depth(device); let task = task.map(|t| compile_glsl(device, t, "task")); @@ -160,6 +163,9 @@ pub enum DrawType { } fn mesh_draw(ctx: &TestingContext, draw_type: DrawType) { + if ctx.adapter.get_info().backend != wgpu::Backend::Vulkan { + return; + } let device = &ctx.device; let (_depth_image, depth_view, depth_state) = create_depth(device); let task = compile_glsl(device, BASIC_TASK, "task"); From 760de4b59e007cbde0d0d80e761d7ea839b7b476 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 14:33:19 -0500 Subject: [PATCH 14/35] Add changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11fb072dcab..2f679924d06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,9 @@ This allows using precompiled shaders without manually checking which backend's - Allow disabling waiting for latency waitable object. By @marcpabst in [#7400](https://github.com/gfx-rs/wgpu/pull/7400) +#### Metal +- Add support for mesh shaders. By @SupaMaggie70Incorporated in [#8139](https://github.com/gfx-rs/wgpu/pull/8139) + ### Bug Fixes #### General From 3f56df6b4842a14a79301ec8eeeeac309deac01b Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 15:10:47 -0500 Subject: [PATCH 15/35] Made some stuff more generic (bind groups & push constants) --- wgpu-hal/src/metal/command.rs | 300 +++++++++++++++++----------------- 1 file changed, 148 insertions(+), 152 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 542287983e9..1e4ac8d2419 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -672,168 +672,150 @@ impl crate::CommandEncoder for super::CommandEncoder { dynamic_offsets: &[wgt::DynamicOffset], ) { let bg_info = &layout.bind_group_infos[group_index as usize]; - - if let Some(ref encoder) = self.state.render { - let mut changes_sizes_buffer = false; - for index in 0..group.counters.vs.buffers { - let buf = &group.buffers[index as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + let render_encoder = self.state.render.clone(); + let compute_encoder = self.state.compute.clone(); + let mut update_stage = + |stage: naga::ShaderStage, + render_encoder: Option<&metal::RenderCommandEncoder>, + compute_encoder: Option<&metal::ComputeCommandEncoder>| { + let buffers = match stage { + naga::ShaderStage::Vertex => group.counters.vs.buffers, + naga::ShaderStage::Fragment => group.counters.fs.buffers, + naga::ShaderStage::Task => group.counters.ts.buffers, + naga::ShaderStage::Mesh => group.counters.ms.buffers, + naga::ShaderStage::Compute => group.counters.cs.buffers, + }; + let mut changes_sizes_buffer = false; + for index in 0..buffers { + let buf = &group.buffers[index as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + let a1 = (bg_info.base_resource_indices.vs.buffers + index) as u64; + let a2 = Some(buf.ptr.as_native()); + let a3 = offset; + match stage { + naga::ShaderStage::Vertex => { + render_encoder.unwrap().set_vertex_buffer(a1, a2, a3) + } + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_buffer(a1, a2, a3) + } + naga::ShaderStage::Task => { + render_encoder.unwrap().set_object_buffer(a1, a2, a3) + } + naga::ShaderStage::Mesh => { + render_encoder.unwrap().set_mesh_buffer(a1, a2, a3) + } + naga::ShaderStage::Compute => { + compute_encoder.unwrap().set_buffer(a1, a2, a3) + } + } + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } } - encoder.set_vertex_buffer( - (bg_info.base_resource_indices.vs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + if changes_sizes_buffer { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(stage, &mut self.temp.binding_sizes) + { + let a1 = index as _; + let a2 = (sizes.len() * WORD_SIZE) as u64; + let a3 = sizes.as_ptr().cast(); + match stage { + naga::ShaderStage::Vertex => { + render_encoder.unwrap().set_vertex_bytes(a1, a2, a3) + } + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_bytes(a1, a2, a3) + } + naga::ShaderStage::Task => { + render_encoder.unwrap().set_object_bytes(a1, a2, a3) + } + naga::ShaderStage::Mesh => { + render_encoder.unwrap().set_mesh_bytes(a1, a2, a3) + } + naga::ShaderStage::Compute => { + compute_encoder.unwrap().set_bytes(a1, a2, a3) + } + } + } } - } - if changes_sizes_buffer { - if let Some((index, sizes)) = self.state.make_sizes_buffer_update( - naga::ShaderStage::Vertex, - &mut self.temp.binding_sizes, - ) { - encoder.set_vertex_bytes( - index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), - ); + let samplers = match stage { + naga::ShaderStage::Vertex => group.counters.vs.samplers, + naga::ShaderStage::Fragment => group.counters.fs.samplers, + naga::ShaderStage::Task => group.counters.ts.samplers, + naga::ShaderStage::Mesh => group.counters.ms.samplers, + naga::ShaderStage::Compute => group.counters.cs.samplers, + }; + for index in 0..samplers { + let res = group.samplers[(group.counters.vs.samplers + index) as usize]; + let a1 = (bg_info.base_resource_indices.fs.samplers + index) as u64; + let a2 = Some(res.as_native()); + match stage { + naga::ShaderStage::Vertex => { + render_encoder.unwrap().set_vertex_sampler_state(a1, a2) + } + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_sampler_state(a1, a2) + } + naga::ShaderStage::Task => { + render_encoder.unwrap().set_object_sampler_state(a1, a2) + } + naga::ShaderStage::Mesh => { + render_encoder.unwrap().set_mesh_sampler_state(a1, a2) + } + naga::ShaderStage::Compute => { + compute_encoder.unwrap().set_sampler_state(a1, a2) + } + } } - } - changes_sizes_buffer = false; - for index in 0..group.counters.fs.buffers { - let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_fragment_buffer( - (bg_info.base_resource_indices.fs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; - } - } - if changes_sizes_buffer { - if let Some((index, sizes)) = self.state.make_sizes_buffer_update( - naga::ShaderStage::Fragment, - &mut self.temp.binding_sizes, - ) { - encoder.set_fragment_bytes( - index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), - ); + let textures = match stage { + naga::ShaderStage::Vertex => group.counters.vs.textures, + naga::ShaderStage::Fragment => group.counters.fs.textures, + naga::ShaderStage::Task => group.counters.ts.textures, + naga::ShaderStage::Mesh => group.counters.ms.textures, + naga::ShaderStage::Compute => group.counters.cs.textures, + }; + for index in 0..textures { + let res = group.textures[index as usize]; + let a1 = (bg_info.base_resource_indices.vs.textures + index) as u64; + let a2 = Some(res.as_native()); + match stage { + naga::ShaderStage::Vertex => { + render_encoder.unwrap().set_vertex_texture(a1, a2) + } + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_texture(a1, a2) + } + naga::ShaderStage::Task => { + render_encoder.unwrap().set_object_texture(a1, a2) + } + naga::ShaderStage::Mesh => render_encoder.unwrap().set_mesh_texture(a1, a2), + naga::ShaderStage::Compute => compute_encoder.unwrap().set_texture(a1, a2), + } } - } - - for index in 0..group.counters.vs.samplers { - let res = group.samplers[index as usize]; - encoder.set_vertex_sampler_state( - (bg_info.base_resource_indices.vs.samplers + index) as u64, - Some(res.as_native()), - ); - } - for index in 0..group.counters.fs.samplers { - let res = group.samplers[(group.counters.vs.samplers + index) as usize]; - encoder.set_fragment_sampler_state( - (bg_info.base_resource_indices.fs.samplers + index) as u64, - Some(res.as_native()), - ); - } - - for index in 0..group.counters.vs.textures { - let res = group.textures[index as usize]; - encoder.set_vertex_texture( - (bg_info.base_resource_indices.vs.textures + index) as u64, - Some(res.as_native()), - ); - } - for index in 0..group.counters.fs.textures { - let res = group.textures[(group.counters.vs.textures + index) as usize]; - encoder.set_fragment_texture( - (bg_info.base_resource_indices.fs.textures + index) as u64, - Some(res.as_native()), - ); - } - + }; + if let Some(encoder) = render_encoder { + update_stage(naga::ShaderStage::Vertex, Some(&encoder), None); + update_stage(naga::ShaderStage::Fragment, Some(&encoder), None); + update_stage(naga::ShaderStage::Task, Some(&encoder), None); + update_stage(naga::ShaderStage::Mesh, Some(&encoder), None); // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { encoder.use_resource_at(resource.as_native(), use_info.uses, use_info.stages); } } - - if let Some(ref encoder) = self.state.compute { - let index_base = super::ResourceData { - buffers: group.counters.vs.buffers + group.counters.fs.buffers, - samplers: group.counters.vs.samplers + group.counters.fs.samplers, - textures: group.counters.vs.textures + group.counters.fs.textures, - }; - - let mut changes_sizes_buffer = false; - for index in 0..group.counters.cs.buffers { - let buf = &group.buffers[(index_base.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_buffer( - (bg_info.base_resource_indices.cs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; - } - } - if changes_sizes_buffer { - if let Some((index, sizes)) = self.state.make_sizes_buffer_update( - naga::ShaderStage::Compute, - &mut self.temp.binding_sizes, - ) { - encoder.set_bytes( - index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), - ); - } - } - - for index in 0..group.counters.cs.samplers { - let res = group.samplers[(index_base.samplers + index) as usize]; - encoder.set_sampler_state( - (bg_info.base_resource_indices.cs.samplers + index) as u64, - Some(res.as_native()), - ); - } - for index in 0..group.counters.cs.textures { - let res = group.textures[(index_base.textures + index) as usize]; - encoder.set_texture( - (bg_info.base_resource_indices.cs.textures + index) as u64, - Some(res.as_native()), - ); - } - + if let Some(encoder) = compute_encoder { + update_stage(naga::ShaderStage::Compute, None, Some(&encoder)); // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { if !use_info.visible_in_compute { @@ -881,6 +863,20 @@ impl crate::CommandEncoder for super::CommandEncoder { state_pc.as_ptr().cast(), ) } + if stages.contains(wgt::ShaderStages::TASK) { + self.state.render.as_ref().unwrap().set_object_bytes( + layout.push_constants_infos.ts.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr().cast(), + ) + } + if stages.contains(wgt::ShaderStages::MESH) { + self.state.render.as_ref().unwrap().set_object_bytes( + layout.push_constants_infos.ms.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr().cast(), + ) + } } unsafe fn insert_debug_marker(&mut self, label: &str) { From d6931d2f0a2217b8a8ba28f23e15cae0b89f54f2 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sun, 24 Aug 2025 15:27:35 -0500 Subject: [PATCH 16/35] Applied some fixes --- wgpu-hal/src/metal/command.rs | 11 ++++++----- wgpu-hal/src/metal/device.rs | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 1e4ac8d2419..2ebf80f0f26 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1146,12 +1146,13 @@ impl crate::CommandEncoder for super::CommandEncoder { group_count_z: u32, ) { let encoder = self.state.render.as_ref().unwrap(); + let size = MTLSize { + width: group_count_x as u64, + height: group_count_y as u64, + depth: group_count_z as u64, + }; encoder.draw_mesh_threadgroups( - MTLSize { - width: group_count_x as u64, - height: group_count_y as u64, - depth: group_count_z as u64, - }, + size, self.state.stage_infos.ts.raw_wg_size, self.state.stage_infos.ms.raw_wg_size, ); diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index ca16a222efb..70753a3ff6c 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1264,7 +1264,7 @@ impl crate::Device for super::Device { sized_bindings: ts.sized_bindings, vertex_buffer_mappings: vec![], library: Some(ts.library), - raw_wg_size: Default::default(), + raw_wg_size: ts.wg_size, work_group_memory_sizes: vec![], }); } else { @@ -1291,7 +1291,7 @@ impl crate::Device for super::Device { sized_bindings: ms.sized_bindings, vertex_buffer_mappings: vec![], library: Some(ms.library), - raw_wg_size: Default::default(), + raw_wg_size: ms.wg_size, work_group_memory_sizes: vec![], }); } From 86d1877de42defc7f45609b2edfcebc3b94b32f9 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 00:07:59 -0500 Subject: [PATCH 17/35] MESH SHADERS ON METAL LMAO HAHA YESS --- wgpu-hal/src/metal/device.rs | 2 +- wgpu-types/src/lib.rs | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index ec663ce78a3..13b6853529c 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1258,7 +1258,7 @@ impl crate::Device for super::Device { primitive_class, naga::ShaderStage::Task, )?; - descriptor.set_mesh_function(Some(&ts.function)); + descriptor.set_object_function(Some(&ts.function)); if self.shared.private_caps.supports_mutability { Self::set_buffers_mutability( descriptor.mesh_buffers().unwrap(), diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 9b9cd1d3e02..3f062606040 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1045,14 +1045,12 @@ impl Limits { #[must_use] pub const fn using_recommended_minimum_mesh_shader_values(self) -> Self { Self { - // Literally just made this up as 256^2 or 2^16. - // My GPU supports 2^22, and compute shaders don't have this kind of limit. - // This very likely is never a real limiter + // I believe this is a common limit for apple devices. I'm not entirely sure why. max_task_workgroup_total_count: 1024, max_task_workgroups_per_dimension: 1024, // llvmpipe reports 0 multiview count, which just means no multiview is allowed max_mesh_multiview_count: 0, - // llvmpipe once again requires this to be 8. An RTX 3060 supports well over 1024. + // llvmpipe once again requires this to be <=8. An RTX 3060 supports well over 1024. max_mesh_output_layers: 8, ..self } From 00c19fc39c515b98ea93645a5bd03c27fa46c29f Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 00:28:57 -0500 Subject: [PATCH 18/35] Looked over all except command.rs --- wgpu-hal/src/metal/device.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 13b6853529c..f4e7592e9d8 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1111,7 +1111,7 @@ impl crate::Device for super::Device { >, ) -> Result { objc::rc::autoreleasepool(|| { - let (primitive_class, _raw_primitive_type) = + let (primitive_class, raw_primitive_type) = conv::map_primitive_topology(desc.primitive.topology); let vs_info; @@ -1323,9 +1323,6 @@ impl crate::Device for super::Device { ), }; - let (primitive_class, raw_primitive_type) = - conv::map_primitive_topology(desc.primitive.topology); - // Fragment shader let fs_info = match desc.fragment_stage { Some(ref stage) => { @@ -1461,7 +1458,7 @@ impl crate::Device for super::Device { wgt::ShaderStages::TASK | wgt::ShaderStages::MESH | wgt::ShaderStages::FRAGMENT, - format!("new_render_pipeline_state: {e:?}"), + format!("new_mesh_render_pipeline_state: {e:?}"), ) })?, }; From effe0f41a8bacdc23fbea5aeed8ea34f053b65ab Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 01:14:02 -0500 Subject: [PATCH 19/35] =?UTF-8?q?(Almost)=20everything=20passes=20?= =?UTF-8?q?=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wgpu-hal/src/metal/command.rs | 82 ++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 0799a76ff28..19c4fe6ffeb 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -681,7 +681,15 @@ impl crate::CommandEncoder for super::CommandEncoder { let mut update_stage = |stage: naga::ShaderStage, render_encoder: Option<&metal::RenderCommandEncoder>, - compute_encoder: Option<&metal::ComputeCommandEncoder>| { + compute_encoder: Option<&metal::ComputeCommandEncoder>, + index_base: super::ResourceData| { + let resource_indices = match stage { + naga::ShaderStage::Vertex => &bg_info.base_resource_indices.vs, + naga::ShaderStage::Fragment => &bg_info.base_resource_indices.fs, + naga::ShaderStage::Task => &bg_info.base_resource_indices.ts, + naga::ShaderStage::Mesh => &bg_info.base_resource_indices.ms, + naga::ShaderStage::Compute => &bg_info.base_resource_indices.cs, + }; let buffers = match stage { naga::ShaderStage::Vertex => group.counters.vs.buffers, naga::ShaderStage::Fragment => group.counters.fs.buffers, @@ -691,12 +699,12 @@ impl crate::CommandEncoder for super::CommandEncoder { }; let mut changes_sizes_buffer = false; for index in 0..buffers { - let buf = &group.buffers[index as usize]; + let buf = &group.buffers[(index_base.buffers + index) as usize]; let mut offset = buf.offset; if let Some(dyn_index) = buf.dynamic_index { offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; } - let a1 = (bg_info.base_resource_indices.vs.buffers + index) as u64; + let a1 = (resource_indices.buffers + index) as u64; let a2 = Some(buf.ptr.as_native()); let a3 = offset; match stage { @@ -760,8 +768,8 @@ impl crate::CommandEncoder for super::CommandEncoder { naga::ShaderStage::Compute => group.counters.cs.samplers, }; for index in 0..samplers { - let res = group.samplers[(group.counters.vs.samplers + index) as usize]; - let a1 = (bg_info.base_resource_indices.fs.samplers + index) as u64; + let res = group.samplers[(index_base.samplers + index) as usize]; + let a1 = (resource_indices.samplers + index) as u64; let a2 = Some(res.as_native()); match stage { naga::ShaderStage::Vertex => { @@ -790,8 +798,8 @@ impl crate::CommandEncoder for super::CommandEncoder { naga::ShaderStage::Compute => group.counters.cs.textures, }; for index in 0..textures { - let res = group.textures[index as usize]; - let a1 = (bg_info.base_resource_indices.vs.textures + index) as u64; + let res = group.textures[(index_base.textures + index) as usize]; + let a1 = (resource_indices.textures + index) as u64; let a2 = Some(res.as_native()); match stage { naga::ShaderStage::Vertex => { @@ -809,17 +817,67 @@ impl crate::CommandEncoder for super::CommandEncoder { } }; if let Some(encoder) = render_encoder { - update_stage(naga::ShaderStage::Vertex, Some(&encoder), None); - update_stage(naga::ShaderStage::Fragment, Some(&encoder), None); - update_stage(naga::ShaderStage::Task, Some(&encoder), None); - update_stage(naga::ShaderStage::Mesh, Some(&encoder), None); + update_stage( + naga::ShaderStage::Vertex, + Some(&encoder), + None, + // All zeros, as vs comes first + super::ResourceData::default(), + ); + update_stage( + naga::ShaderStage::Task, + Some(&encoder), + None, + // All zeros, as ts comes first + super::ResourceData::default(), + ); + update_stage( + naga::ShaderStage::Mesh, + Some(&encoder), + None, + group.counters.ts.clone(), + ); + update_stage( + naga::ShaderStage::Fragment, + Some(&encoder), + None, + super::ResourceData { + buffers: group.counters.vs.buffers + + group.counters.ts.buffers + + group.counters.ms.buffers, + textures: group.counters.vs.textures + + group.counters.ts.textures + + group.counters.ms.textures, + samplers: group.counters.vs.samplers + + group.counters.ts.samplers + + group.counters.ms.samplers, + }, + ); // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { encoder.use_resource_at(resource.as_native(), use_info.uses, use_info.stages); } } if let Some(encoder) = compute_encoder { - update_stage(naga::ShaderStage::Compute, None, Some(&encoder)); + update_stage( + naga::ShaderStage::Compute, + None, + Some(&encoder), + super::ResourceData { + buffers: group.counters.vs.buffers + + group.counters.ts.buffers + + group.counters.ms.buffers + + group.counters.fs.buffers, + textures: group.counters.vs.textures + + group.counters.ts.textures + + group.counters.ms.textures + + group.counters.fs.textures, + samplers: group.counters.vs.samplers + + group.counters.ts.samplers + + group.counters.ms.samplers + + group.counters.fs.samplers, + }, + ); // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { if !use_info.visible_in_compute { From 27d595e91faa0f8438642103ada78cdee157e0bf Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 01:20:34 -0500 Subject: [PATCH 20/35] Another quick fix (still 2 failing) --- wgpu-hal/src/metal/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index fda7e001906..27e357cf4c9 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -653,11 +653,15 @@ impl MultiStageData { iter::once(&self.vs) .chain(iter::once(&self.fs)) .chain(iter::once(&self.cs)) + .chain(iter::once(&self.ts)) + .chain(iter::once(&self.ms)) } fn iter_mut<'a>(&'a mut self) -> impl Iterator { iter::once(&mut self.vs) .chain(iter::once(&mut self.fs)) .chain(iter::once(&mut self.cs)) + .chain(iter::once(&mut self.ts)) + .chain(iter::once(&mut self.ms)) } } From fb7e24c0d45a7e2b7662efb94876fffab14e8a15 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 01:32:40 -0500 Subject: [PATCH 21/35] Update changelog.md --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 011f59392e2..13f4c1f1f43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,9 @@ SamplerDescriptor { - Texture now has `from_custom`. By @R-Cramer4 in [#8315](https://github.com/gfx-rs/wgpu/pull/8315). +#### Metal +- Add support for mesh shaders. By @SupaMaggie70Incorporated in [#8139](https://github.com/gfx-rs/wgpu/pull/8139) + ### Bug Fixes #### General @@ -313,9 +316,6 @@ By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282), [#8285](https:// - Allow disabling waiting for latency waitable object. By @marcpabst in [#7400](https://github.com/gfx-rs/wgpu/pull/7400) - Add mesh shader support, including to the example. By @SupaMaggie70Incorporated in [#8110](https://github.com/gfx-rs/wgpu/issues/8110) -#### Metal -- Add support for mesh shaders. By @SupaMaggie70Incorporated in [#8139](https://github.com/gfx-rs/wgpu/pull/8139) - ### Bug Fixes #### General From 233d76f09a8320c291e1fed5bb1a725e512f76b9 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 02:05:15 -0500 Subject: [PATCH 22/35] Added little bit to explain something --- wgpu-hal/src/metal/command.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 19c4fe6ffeb..0b76cd47bfc 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1029,7 +1029,33 @@ impl crate::CommandEncoder for super::CommandEncoder { ); } } - if pipeline.ms_info.is_some() { + if let Some(_ms_info) = &pipeline.ms_info { + // TODO: + // https://developer.apple.com/documentation/metal/mtlrendercommandencoder/setthreadgroupmemorylength(_:offset:index:) + // doesn't exist in current metal-rs version for some reason. Maybe put it off until objc2 arrives? + // Also, this will need to be added to the task stage + /* + // update the threadgroup memory sizes + while self.state.stage_infos.ms.work_group_memory_sizes.len() + < ms_info.work_group_memory_sizes.len() + { + self.state.stage_infos.ms.work_group_memory_sizes.push(0); + } + for (index, (cur_size, pipeline_size)) in self + .state + .stage_infos + .ms + .work_group_memory_sizes + .iter_mut() + .zip(ms_info.work_group_memory_sizes.iter()) + .enumerate() + { + let size = pipeline_size.next_multiple_of(16); + if *cur_size != size { + *cur_size = size; + encoder.set_threadgroup_memory_length(index as _, size as _); + } + }*/ if let Some((index, sizes)) = self .state .make_sizes_buffer_update(naga::ShaderStage::Mesh, &mut self.temp.binding_sizes) From 2d60810c4194d6fa000088d0661977bf4b534db6 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 02:34:07 -0500 Subject: [PATCH 23/35] More tiny incremental upgrades --- wgpu-hal/src/metal/command.rs | 5 ----- wgpu-hal/src/metal/device.rs | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 0b76cd47bfc..d43f0aefed7 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1420,11 +1420,6 @@ impl crate::CommandEncoder for super::CommandEncoder { } // update the threadgroup memory sizes - while self.state.stage_infos.cs.work_group_memory_sizes.len() - < pipeline.cs_info.work_group_memory_sizes.len() - { - self.state.stage_infos.cs.work_group_memory_sizes.push(0); - } for (index, (cur_size, pipeline_size)) in self .state .stage_infos diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index f4e7592e9d8..a19a14fb074 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1272,7 +1272,7 @@ impl crate::Device for super::Device { vertex_buffer_mappings: vec![], library: Some(ts.library), raw_wg_size: ts.wg_size, - work_group_memory_sizes: vec![], + work_group_memory_sizes: ts.wg_memory_sizes, }); } else { ts_info = None; @@ -1299,7 +1299,7 @@ impl crate::Device for super::Device { vertex_buffer_mappings: vec![], library: Some(ms.library), raw_wg_size: ms.wg_size, - work_group_memory_sizes: vec![], + work_group_memory_sizes: ms.wg_memory_sizes, }); } MetalGenericRenderPipelineDescriptor::Mesh(descriptor) From 204c542f4af2b0b9099c2fd8a471434738786924 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 03:50:51 -0500 Subject: [PATCH 24/35] Am I ... whatever its bedtime --- wgpu-hal/src/metal/command.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index d43f0aefed7..3d9ffcf783f 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1403,6 +1403,8 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let previous_sizes = + core::mem::take(&mut self.state.stage_infos.cs.work_group_memory_sizes); self.state.stage_infos.cs.assign_from(&pipeline.cs_info); let encoder = self.state.compute.as_ref().unwrap(); @@ -1420,19 +1422,23 @@ impl crate::CommandEncoder for super::CommandEncoder { } // update the threadgroup memory sizes - for (index, (cur_size, pipeline_size)) in self + for (i, current_size) in self .state .stage_infos .cs .work_group_memory_sizes .iter_mut() - .zip(pipeline.cs_info.work_group_memory_sizes.iter()) .enumerate() { - let size = pipeline_size.next_multiple_of(16); - if *cur_size != size { - *cur_size = size; - encoder.set_threadgroup_memory_length(index as _, size as _); + let prev_size = if i < previous_sizes.len() { + previous_sizes[i] + } else { + u32::MAX + }; + let size: u32 = current_size.next_multiple_of(16); + *current_size = size; + if size != prev_size { + encoder.set_threadgroup_memory_length(i as _, size as _); } } } From 7e6dee67972975b5420793f9dab5dd2ae3d7e81c Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Sat, 11 Oct 2025 14:22:50 -0500 Subject: [PATCH 25/35] Did some work --- wgpu-hal/src/metal/command.rs | 44 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 3d9ffcf783f..46cf52716c8 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1017,27 +1017,10 @@ impl crate::CommandEncoder for super::CommandEncoder { ); } } - if pipeline.ts_info.is_some() { - if let Some((index, sizes)) = self - .state - .make_sizes_buffer_update(naga::ShaderStage::Task, &mut self.temp.binding_sizes) - { - encoder.set_object_bytes( - index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), - ); - } - } - if let Some(_ms_info) = &pipeline.ms_info { - // TODO: - // https://developer.apple.com/documentation/metal/mtlrendercommandencoder/setthreadgroupmemorylength(_:offset:index:) - // doesn't exist in current metal-rs version for some reason. Maybe put it off until objc2 arrives? - // Also, this will need to be added to the task stage - /* + if let Some(ts_info) = &pipeline.ts_info { // update the threadgroup memory sizes while self.state.stage_infos.ms.work_group_memory_sizes.len() - < ms_info.work_group_memory_sizes.len() + < ts_info.work_group_memory_sizes.len() { self.state.stage_infos.ms.work_group_memory_sizes.push(0); } @@ -1047,15 +1030,32 @@ impl crate::CommandEncoder for super::CommandEncoder { .ms .work_group_memory_sizes .iter_mut() - .zip(ms_info.work_group_memory_sizes.iter()) + .zip(ts_info.work_group_memory_sizes.iter()) .enumerate() { let size = pipeline_size.next_multiple_of(16); if *cur_size != size { *cur_size = size; - encoder.set_threadgroup_memory_length(index as _, size as _); + encoder.set_object_threadgroup_memory_length(index as _, size as _); } - }*/ + } + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Task, &mut self.temp.binding_sizes) + { + encoder.set_object_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr().cast(), + ); + } + } + if let Some(_ms_info) = &pipeline.ms_info { + // So there isn't an equivalent to + // https://developer.apple.com/documentation/metal/mtlrendercommandencoder/setthreadgroupmemorylength(_:offset:index:) + // for mesh shaders. This is probably because the CPU has less control over the dispatch sizes and such. Interestingly + // it also affects mesh shaders without task/object shaders, even though none of compute, task or fragment shaders + // behave this way. if let Some((index, sizes)) = self .state .make_sizes_buffer_update(naga::ShaderStage::Mesh, &mut self.temp.binding_sizes) From 69b97953ce0fd98814e398a0b8aec4aa913045c2 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 29 Oct 2025 14:03:30 -0500 Subject: [PATCH 26/35] Some tweaks --- examples/features/src/lib.rs | 1 + examples/features/src/mesh_shader/mod.rs | 37 ++++++--- .../features/src/mesh_shader/screenshot.png | Bin 0 -> 34256 bytes tests/tests/wgpu-gpu/mesh_shader/mod.rs | 41 +++++++--- tests/tests/wgpu-gpu/mesh_shader/shader.metal | 77 ++++++++++++++++++ wgpu-types/src/features.rs | 5 +- wgpu-types/src/lib.rs | 2 +- 7 files changed, 138 insertions(+), 25 deletions(-) create mode 100644 examples/features/src/mesh_shader/screenshot.png create mode 100644 tests/tests/wgpu-gpu/mesh_shader/shader.metal diff --git a/examples/features/src/lib.rs b/examples/features/src/lib.rs index baacf6a6b39..05f2db5ef21 100644 --- a/examples/features/src/lib.rs +++ b/examples/features/src/lib.rs @@ -48,6 +48,7 @@ fn all_tests() -> Vec { cube::TEST, cube::TEST_LINES, hello_synchronization::tests::SYNC, + mesh_shader::TEST, mipmap::TEST, mipmap::TEST_QUERY, msaa_line::TEST, diff --git a/examples/features/src/mesh_shader/mod.rs b/examples/features/src/mesh_shader/mod.rs index 2916e0fadcf..33ea10ba59d 100644 --- a/examples/features/src/mesh_shader/mod.rs +++ b/examples/features/src/mesh_shader/mod.rs @@ -83,26 +83,23 @@ impl crate::framework::Example for Example { device: &wgpu::Device, _queue: &wgpu::Queue, ) -> Self { - let (ts, ms, fs) = if adapter.get_info().backend == wgpu::Backend::Vulkan { - ( + let (ts, ms, fs) = match adapter.get_info().backend { + wgpu::Backend::Vulkan => ( compile_glsl(device, "task"), compile_glsl(device, "mesh"), compile_glsl(device, "frag"), - ) - } else if adapter.get_info().backend == wgpu::Backend::Dx12 { - ( + ), + wgpu::Backend::Dx12 => ( compile_hlsl(device, "Task", "as"), compile_hlsl(device, "Mesh", "ms"), compile_hlsl(device, "Frag", "ps"), - ) - } else if adapter.get_info().backend == wgpu::Backend::Metal { - ( + ), + wgpu::Backend::Metal => ( compile_msl(device, "taskShader"), compile_msl(device, "meshShader"), compile_msl(device, "fragShader"), - ) - } else { - panic!("Example can only run on vulkan or dx12"); + ), + _ => panic!("Example can currently only run on vulkan, dx12 or metal"), }; let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, @@ -196,3 +193,21 @@ impl crate::framework::Example for Example { pub fn main() { crate::framework::run::("mesh_shader"); } + +#[cfg(test)] +#[wgpu_test::gpu_test] +pub static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { + name: "mesh_shader", + image_path: "/examples/features/src/mesh_shader/screenshot.png", + width: 1024, + height: 768, + optional_features: wgpu::Features::default(), + base_test_parameters: wgpu_test::TestParameters::default() + .features( + wgpu::Features::EXPERIMENTAL_MESH_SHADER + | wgpu::Features::EXPERIMENTAL_PASSTHROUGH_SHADERS, + ) + .limits(wgpu::Limits::defaults().using_recommended_minimum_mesh_shader_values()), + comparisons: &[wgpu_test::ComparisonType::Mean(0.01)], + _phantom: std::marker::PhantomData::, +}; diff --git a/examples/features/src/mesh_shader/screenshot.png b/examples/features/src/mesh_shader/screenshot.png new file mode 100644 index 0000000000000000000000000000000000000000..df76e1415048224c23423e3cb5610936c91f886d GIT binary patch literal 34256 zcmb_l3tY|T|Now@Qq71I(#HNVa&5wj(=OP>-6*$;C|jwlQKZf>mu+s%%q5qO%g;nO zsUeheF!ywJNl9_qY_)~T((Tka&;R}Tp0e)FDSN$YI(@&-^SR%jr*9@t9B0|STYHY< zEI<6<{iz&hgnt@x?M(2$bLYBPa@^V-AHM(2Ct>%VsNGWw$M2AqZua}dC4Wyy+DLp` zkiX~dNYiEO>~4(od-a1o!9NL1KltE-cCWu~^lm#N1Ea6|zMtlpksYTjth_I=KfliZ z`?vr$@SkmmpG;xh%0}I}KfNSD)Rb#(3$P`}^}HSQFTe@wD#|G;&c0&LW@`*N zuILg0H0X~g^#F4w(d=N8nA*%4Z-jM`{n3TvihEw@6&YbDml@#S1FMT7InMLaqyiRX zB})<%_esFZVV2JHT_8NlBuxDw|FZFDERk!x@+g!2lX*g2ZI1^=TM!g!+g0rB>&1N% z=^a=8&B5UXEc2R3w0;6TuxS$gX7IQImM_8@X9v@deFxx26S?Fjz{ov$7a!%WT9D49 zQIABs__w~{-+Ik2T_nNI#Q`>W#C4++Eh2i`oa4mT<^fx==`RO0B2Bs9U2r3JN(H`) zO{eVFR54IS?lccC6~Edqohg`RyfuD^x=7UNuy3lGMN1USaW}1MAfd;&qjy9qA;&3J zV?X(A+x`5ygjLcg!YXMkKKNVJ*23*?bX8sZ+yMgy2!TS6-7P5da?z=S)DvBKh$ zw2tLjY?aLNvk1QJJbc|hKcyPkmi_$vq!7x>foQdXkz^2TH0 z*}v1nfj_k`kSVZaQXe9F;HCB;#n?+;*NK46JONFP{ZAwEtlAKOkAlrt;=A0^W4gNh zO*~(|ilF)9Sgb}A_*V55+G_hXqJ?>4g38`LU-S*3wf$-ze0NNp3##Mwc%H-7f4zdO z)Uxjhs|4D~`<+7Uq^TT~Xoioz zc0fzNj4x@Y1u=w^LG>NN=?5Po={K%3LG-drey4sUx!%1dxwD8@@c7%rshvM{hfMg3 z29O|q%A;fuG?fXZ>ZK&@JHI61+bQ3DM;1aAi~t>*hQLFMIEqJu>2fftF`bGTB8ssktI z1>9ntim6j1Q6eh>>~|ip*V5S&&FRgeM0#i4A0!JV@2U?&*W=sME++4&g#aT;WKQqQ zF#>-TH&AODs z`(7;&mfCaT>GSaW$ws9zFEw-3*g4{-oALO)n}D`iv03PY4aD7u0`$>}QsrT=IGtF! z&#FL_qGHRdU1s8^FL>^}63epHr9B?*PL^9&Uq=-q^44l4?XfnI}>G(*S(?R;-fU z_fib$6fOLB_hJC~?Ij}o8~Zeh=`5lrWMVnJ`PRE?iB!Wn5qi5wcijxN=xw>46L*q` z(w!u75mq~)Dhk0r?-3E?z_>J>c^#dHAA-LoFqaXRGeb7M@7v>9)x#9!0wT}cY7rjr zHr0~!upH0jc8_I`fMaH4cLG*9pYXQSQm6M8TT}Jbv{PQRv)qA~VZ#EJ0>#8r!4~vr zhgeoVjf7V1f!C`$5qK4X0=7&v>e>ejuD&DNlH^NUd;LWCR&%9HqctISc@>quqn63Q znW{=*X&xvqQ!mVWUm)@o;Z^>e-=`Er($^C3+2l z$3cd~q3>z=^G)iAzlXP%l42y=5lcNSOW7#x-a%i13}FM(Sw1iisa_4x#a&4*ia)A@ z`PiqY>{)e()7UStZuMsUC^!3t(h3qn!QBU3yNX4p2>Zq$v;VZg8yK|pT zW=Gh#Q!E&4%Qe0W1YNt;A^Vh-$NhTCA=5S?_WNs#GNQvB$bf_mf!fFoNg@2Sa%%6LF9{v3O7QB;2tz-DMAIi8 zgda}9RIc@8cBbml$g#bgUQuoO!@oa%Puh`VOq2~2@RYsx3&a~sgs#En9zCJuqG50s z3idux*Mz9+Xjv*cFN^VcW9Y+u%Dxwt^(kdrMc=`W$^RfRiGNEAjUSW@EY1;NYwwxF z=R0@UB$AbFq@f;AggTslk_Phb6eZ9y2l!>)_z$TbmuXv504N$xzDu|kF5Kf)lJ`UP zDH8*wpAW*scOy&VsFjv`U7&DyW5!zA*917RLY|pN9I@NGJWhgoIF&6ZWmY6G(~F_G z-ev^-2k9qHXsSS(_+CWfFX*$wN0?+OdkduT76bssAISN@u-9l`%7+kNiY<2XVb(O_ zZSN`Jex*vVR-8rh`x|jea)Pv2LNUhppNL%T>xt z_*iz10Iy8eLZSd&$~9*RBaZm=QGKn9WoldHCSp`91&-pWT7FMuq?ol)0sb_sSwh1K5BilF?!on zY-ZIUief*Y5YDa6T1RJb+$>sa7s>e%7E+_`{8AI@+n#i$**FC{SShrY8j>vBtraK{TlvZHNnpkR5-TMI zoYPn)?5hHSoOl!ogL7kHNKA*q9U_HO@*cxMt13rWRsO-xB9M|@m*d6iPCPG}Cx9D? z*dczcCD<~kz6YMds+?v_VZzECWb{|^%i)n)!5Dos<1hemBIsmVVU?XhFhjPWnz#MZ z`Zk4MZ#u1)jrFQ|J6GMWA$)&RnzT{Alit8)q4 zF2FXol;p#cz<`%7V4BL}4j2Cd;0O7<~AaTr6n3;BdWt7{|NB|iss_Bx1s*maDW{pqz33weHpWF~hZ$?2BZ zLz*H<)X~y6VL*(a3t=6#oVZ`Zi3h}qFtd7Av|Fi6`0xmc$!8FgVm{0tq%~dxN}r2K z?|Hxq=?w{RI!hs4NH!$81vjSKY@*vRqT5rVThB&xJ0S*A9oF%fPGe5_jsVKp8CdLZ ztPN)n8C3ftte5*xuz&QvDgoQR{VYtdYPMWYx-z*!S;uQvkDqChm85uPJ?JhgUKDx!>r0dOib$G1R> z8f3jr-er~5O4vB@Y>G1j+c$x6=CdHIRk(qqM zoy!N1*bQ$ic9Yp76AfstfITm$%r75gS zCAO5dW>82PKcX%nm0HJMXbg&3*9xIK)VK0XJoJWgO8J4Ncs-Ym@l3R&n-kKmW(Qaco>C7 zTbq)|bDD9agW05mUyu{QQ`$U1DYT#du{ubSA7IIOyF!q5)~cZp1)4R$J%r?FYyDW1 zqKb5OP9yyUxh;&pcsbG2p}r(3M^EzK1lD`^q-ATNl~LeW8A@zwPbxXTk?}Dpm7QSG zux6x|D+t=IwdsgVL?TKHTY_DAm!X>GTHBq@h9k7g)a-SHTz7_53+~v+M6OnOWs>Ci zZ6e`+g@`+*A#s13^`6>Pyp(K+agDGdrmlJ{>IyG7@FM7r0#Hq@;<+U!wt0bEKRIyR zMv~USHg!#0IxAZ~%l~MCfLjVB@UE4wNeDJIR66nQ3qp9m+BCd-VTnm%M-U-+vqtG% zE!z&C^8IrJ=SCL5rk29fn1W(cZwgNK*)$5iNLv}DBuRXQrZ2%enat61zgQruay>pQ zUBr3V17B2~w6dGdE3N8-1T~sANnLd7!R{ys+b{&>;)zX*L?2=>NZ}?VBZG%1aiKt#?0$Csr^jb5>A`su z$Y0c&YXUj-l16zA+A%NyJpHigrH3QtGklDaYzl<5-ZDdM%3@ zR$N3L{!J|HRzK;Z;FPEIZLJ``76s$|T5U$mhaXxp_up7DlH;CT1l~q<$Z_IF6OAEc z`2#9Sc57Ovk1!>fo_pj}Q5vgj#YshOALYwAuTcrRTOF{=KJXWV)QgGKhE37~$hok0 zc0-B2CYQrC7G@Net9oE-(vw%}fPLZDZr&isXwqzy&KiVcq}ee3%ZbwEWHBz(4N?Y_ z$|7PicAZC#W6Ft(ARuy+I!njQv#ehEMEDa}Ie}Pd+N8c9|9Ay}7vG}gM-$fd)rAU?G4vOF1YDR4SdeVpr1m37h+?r&(Lt0FdYU&@Q54Nl ziO+pPt9Poix)R14J|Xuw!Hx1>)BJr~Cc)xCRMPaMFuk#1;S4!>TwRgei{$JcvoYu7 zx8THbhc!q4rI9j)Km0xMv0BU z>L1hUs14RF79n9n6gU^=1gjt9R~OLgu1&8l0Z~v()J8`{a&na9e~Z6>ScW+^BbK7Q z%0byBqDu~lCMh!HON>+(p!i%X`Y2E?U->pllC(ozvXjr`cld;M=+XQR(lb;Gj3KRc zeWkJ1#a*H6urnwBh_IkagEp2cUx;0tM0vu? z23)WcxSeF#0=Ez3-+T&X$RTCGgH0%df}$BOlzFH^inNa}&UJ2austYpsFDY;uwSw*+cTU)!)D&7P{1p#*GUBtf)bL42= zWXOOVBfmWE@8u|E-w>sNXlvon#0#5p!lIZg-lrXV1yFz20HbJI*4vTe;t;FLKW`Bu z3R@{3^`PR6N7IV3Ut{Rj-2$P1ZRn0zA^;@?U*BraC!!c*&U*;hgOY3AQg5x-m#AcE zMee_pphZlz))~~z+TPgLk;2bFgR2wgJ>3RRP!B^JYei(iDy61Yj7R%Z-VYix*sjHm zD5`xTx(MPKql_y4r%An3?B!ci9%d$-_(&t-DW}~s;0AYx`}^UYdZk_}%fzy!Y800$ z)%{j@Y6D$yG%3$pO{!Rnw@k#=!0MwqA%-(*WKK?z3l=XmL6x}+aQdP}oZzphf!&4n zp~7~fI0L4Y_Myu6+(ybIOj!2p2e}BfgMC)esEIGw>Wm*MMnA8Hf!#yZjK*OB^e>AG zHUZD4HL+J&!2|*5mk2xYZPVFl4gJQeg%Y(Fif6e-kO@57>|Q?`8fq)xb^Q`aU)tTy z6;CnU<3>p!%Hhx3=xE;6mIaQmN7a#CMonXn5};MrsW5!INsH8>R5sAX#Fq=}04k!6 zUe7~C+k%Rx$tmmw89QRTS6;B4f1)Tk&`InF)?okl(CW8p{@&S3GuEeEX25T=6kH zm*fZ^n|bX>`E?H$gF2@i6@INlOSG<9GCy_ZD4~fjMK~l$59Vr<2|uVkhKhdU=RfMz zRBEJl<(ajSKa>R`M=NF9p_7F4^MMDc$20qsGxZArHBgLNk#>Huqk#jp7!uqw0%N7V>~On`89C83PxM zdn0w6@P+Jmh106uLV@z!1TJ?kj8xd`>Fl)e%Cfzu^E^-fEnE>)+ey$SjC@!w8yNm< zPiVG>Q7}B2w%?G>_9WQ%$q)b4*KLf#Z)jS^`#$J_dm_rYeoxzS6_>-Wp$oy!5pB87 zPYh7Q;b1SP-RFCyv+9-js+j&6b$UHn5j4Sb?IghejyhXKF|#wnJc*))a=fsFDEi^H1y@6{tlDz`ct_FFM}>ypzB-TY%&$xxD)3y_y&G>;yv zw!}zBBiFZaeQH4A`pK-MWC|;J=zb((H}8~k?T9#-0MeN_gS(pWJrOec;bVK`9MnYJ z9x>Z$6NV<1^I--0J(=vhBN9m%+@2PVg4)>Ysd*>bl?sLS_=?T*p_WbA#t}*5)b_jUQDrI%$(+f_sY>hmYNHr&%9iNcb2}D@ zB1hw^x*r;&5t?vF?>OWoxvO-PUeEm+<>Hb~ZY}@?Ax8_KRGmoV#Q7vIjjNdr<>d$) zi8_!1p)qx9&J4&+Ec$U+`T8;Jm+%dZ(qi&|2lM}@F^omu$<>HgWo1l9)ACs6<5X8@ zXBCJRGZtZuBo_MI;rU$IbG!1%0^bn#H7bb&#Tk3qN65U3qG8e$a=>AzBOH5G^ycP*^tqjReVH5=R>I((-kS23<36nc;!TyMMb8HGk*#@I&rmIC0H?cq{?g-1W zTWeXeo&Lz}St<&+npMZNe5{uJz$Dk=(Ud}RgjUgubomJLkz@goNd(BNx&yL>0MRB) z%>jbxfImf;h@~mo+_5?k#j7NhrWcdC!l?&h!f~-c5^?zWS$9WO)PvbH8C19U!v|$O zN9Es7XXh8MF{nV1K)hQU+cpD`w1qf|oYhXE7bLIEv*q3_kG zV}~?K|1A*Sj4LM85__@4WV)7Q9aAEIkEeXYdW}kXTCBM6TSiWQYxTII0otLADX>7) z3_&Gozb12GRLoXY2X6-pB-4C=(wx05pZzBrvCfz~2BGnOT{o&6C92FKs(mXoTSg+X)UqV7vvo!BTKcLNmx2WMm9KHu_DpThFB6{|qSG{J}I+ds=O0{KwkHkRW z49sKA(e}}sQ6@GQu^RmJ3b=wM;;E)#*=tuKJY~ksA=03^sH0F|CqCH$9;yzdM=H0U<_R+&k6DoB!49fY!f-EXt~pL;tVUvp%Ni^v?>d z3oSU7c?2lkUQNC`5ybG7qW2A?W)S2=G>xdCfv`B^^SV_bt?}8O4%;I6KT6f z{ZKFyVe?{$7LHr!=<0=`<4dn{;s|h2+xnL^YbS3G@hiD-9t7JA>3Xb(bRCJzfY?g` z+j+otca`q2rEUYwA`OxJQ7Z+FW`eH-K^u;A#>NOd*_p<2FpIxeDLs(SmzglD z3&#ihRp2)&oB*1Qh7c0UgS&J|5fyfcs;gWv41Cxq9l#l@-dmx>k;?8|*ziVCZcCsY zKj&w~1z~z7421!miVNxNtWRP55dLHa-*s;`ic-U(bXj$zQ7I+duVGT7KE5bg^Q&Q1ryov3zs`f!dTumS11)Fa)jqOoqv<`nf3 zw!gNZ@X4JiEvSQgDRL<~|3gdYOvRYfL-=z(o`x;OqBT^Ui6D@LTXUY!hvfRlXO4&V ze$C}X=ob66K10jP3K|yJp1Ujxk*2na6OId#g-Z8JAP}IdH#CTr6JMb;Y(?#v8H>$m-qQ}|IW>m=RP+|ln5aJr#zZr3@ zvhKcbeoO|zDj~VfI;d879IslT=$GM~UEQ=zFV#BsD}(QV} zlOljYa0KBx&Q!2w1(>nH!UzDG^z zF34JvvZ1P`O zb|(#v{#UUGxrPJNGAr*fHk^kpA`6EY>A`u}c5is1P%pRTaa>VUm}MkC3LE=zGim9) zGk3Wz{PbK+0TnfbLW~Yqgc=;fQQliRNNto#bRv`CfV(+L%ov)fn+}!+8ZEe1I`H3v z!Q=BDc-m>FK>2})h5_h`rS@^9St<)>0UdLcPt{u%KG|qI_A)0Qs)OVn)MVnMiToo7 zLxYS=Php6OFxky{Kz8XgWlc-8e)Xz(JsX>Eux&6+_fT`Ph3p>|ES(KW(Rj_|JP@ny ztba^IKUK@{+h*%+#U&!D#;MLG>lDok#L&09+6WA_hdb#MW0R%h*?+ok9=UrQ+79m4 zoG%1oTK)3_dYRlisvZkXs+9o--kl&4o%JTMZ<^V&OZ=#j4hBN;&o*PREkxo_o9u(< zLX*2j@;gvDbbR%taLB}c?M=9W+H;vL#tP{4Z1S9mxh1%Y`sZ@=H4(PH@=4W7gkrr` z8i+q`Ccu3%J3zd?e)V1$ivDM{i!E9%&iPA%Y-$h4HdB$+hUBgV?CliI&RYiZE}-<} zdMWBDoUDJG^k7V6=gl{Avg4qn&D0-MGo^(-r$*x}HkzyQMF1OZQawG##+Z`8-6dfr z7Mz7{)ZXh^{`pMK<{L}SqW zL9_ocyX?T3V44AIHC;goIw|Ov_v!KH^9&OUkZq^4r$r0U$4}^8>}tdH{p!UbCB$T} zrxVTyZzOdTPzLX%8+NX&c1%pz|Gj1`?(GI#<7fi3n%aQPfXyRA>#a^nTd)PZ1svXm#mI7 zT*Z%-y&&=6r9@`3S#9UR{S85x*d`}l*%`wr7bb9f&?*>V+)U>J_o;l6N=L3RGcKdy z6e^M6mKo)ZzeoB#Gym}t=cb+TevxNfx+3NCRSE;SZ2_X-3{(;#hSzLx7GKX0`$Kh5cYiBR0%4Ig*H(N1(G&cVaXUw(ZjtNK73y=p z-<4E~61ieSXB>QLQ*YhWZl%nWUEUZuuFRs^ z$S@b9iAPwMQwQ0~)2v|Asu4*$Zk#RoZ^-X<*5gF>RmNxLn0lpAdB1@6sVZT*Um5!m z^^z+4A(?h6k5ie_?wc3i_7x=KSiqo`wEWyr_V8hx&*S!=JQIF4{;zdzozu?z8WgZ! z5|?sWmM6Su9ZM~v+K_Fhpm&J-5YqsIWLyD~+_{-TO0yaC)?Y%>{tjKBkVg&XJhjJ_ zUmR{m-Vc796<|%bF%;K74WMttd4vti8t21g&H}o(!4v(ICP$K9q(1pJBdl?xw78cJ z)z&F2^PkcQR2CA4&NIyYzcy-*e?p?qVuNH;5Z|b^#2*suh|3Xa4pWD$O8VO)y6DG) zE0UYYFSnsud`I;^P3D5~vQn04RvZQ$!g&jQGj7GmgUT(Vq8^(oRtbu|b*28Jbw1LY zz5-WhLWh<#;f109RuxRZ^gWBMa?e&6(P(FDTy^j8(eoEL7n&N<5T|Yo$h5qcMhM_M zLWlZ?EcA5Pguq;kz5mP3m=)t#I$gohS{}rO1|Bmv%zZ~6BvLgUt>uaFg5p=(lsJ+a zA7o1@NEPSjT8Y1EW`3l_@Y+_r`SJ1vUMV3yXli5HRL1}$aKtj-vYw7&KW{@CAgw!d zsAuhID)UdEJJl-p1{mb@l%R^oz#=V=H%L$kKVQE* zUqKF(VtAXCBITZ}Fxx0;&}c!@o2|&u_WV+GJ!V|luyezYK`qwlB~gXZyN!b<1#meQ z^`^4)ETT**5KSm6VCR?%(69p?tdWZ zPiMOxpJ$KMMf0t^akB%h)>?j?x)gCtU`P|Wdgw-Z+TODY0}p7&hWdk9 zY1u{|2$&pO8}jCFi8vl??|%{Z3+hyE)&}j-St%jqIevm;-&l&Tv?)g=*Db_hUy=K6 z?$+;Cw3_|}Bnog{6QnLV*Lv90-mBAMmr|Rn4)5XObUw83W|gD(6y&O1o8{_^`qB9b z;-$rAbU{z+TzJo{OuIs#ffiz8ebwVS(7u<~Jz&$a0iCKeuSZ$e|! zdbyB@!&h9)y&awD7V$PTRXKT(dG{DCu&LOwN5vv;H$J=Ae4l+|42p!k z!+U1!sES=D7);j!y|`!jpA%6of0|}s12w6C)w(DL1(Ta%iw8&0YR=L<99Uiewnv!#EkqxJFK2GUi zDp2{5uPSs4;J#>chyw8nk+AXvv!8L&vAyQIX-p>NFD0*kfbE5Ko!(WS+q+-n{?;p< zT2_=Z33*UaBBzQM1lPZet94wFR8>pL7aQ!sl!u+@1}@vm+|&5r*$b;YTO!Uz#m(dP z=!*>ft%G@jJ(o=yTrAdVT*d*HKcOz)v`Jm0zZZ7z>*UShYJNJw{^AB6AJIpwxe{Kb zR_-?b_L7NP?>5I?eH`P*CETkzY4-ExNnEEk@y6@&{o#{+FVPUY9=SV&4IT}$hQBWm zTz_a@&rys%r80*qGxNt=j%gwT40^!ruam85y|>Jf{{ByQM|U|5zUzmNbK(UwA0yts8>cgiWdW= z`X>lllYd<@2!A6KmqVaIf&Z_?hMQ=)wH;chUlzO%V%lBkh3iik_yr*q7!fs@;PunmPUDH8L+i+k&R*7&Oef z=%MqsGdbC|%pgvm{NSETwZFzHU=^L|sTsH6Yp!dI%RY#~wJ!6ma11k@Eo$m^0-d5b z9>*?Wrvxn?6s_AS)n=byp8sR9q(bc(C`evrSpVEeD~hmc9wS`4kdeIHD)*EGRBCM^ zx}$2?vH7OStK7SBI*k6)fP-r3<&#c{;rXY-BxjODamK~xEbE_4YlWjKv$DS18`mGP z8{LNsJj=~m+nGe6p)>Ngn9zm1UQ=9D3csqN+qX8}%G+~$?(fM5@}2lI|9JsrS5pH&}GIR<1#5{LNcm9t@t=L?18%u-q ze7f4+-pkI%ecu5##i!qJqXWSgwnh4wtLHqq&fY5+m?;g*t795V{NI)K4jC8t%Erh3 zYhfV^izEBx*=&3I(**Ig2!rCvL26|4buPGB_KZ!Iz9ksC)NkmCV>J^~x7$CQac=wX zH#dDr&FS#`+h0bHnx3}A;nB?@p-P*u%Lbx9-+g<2T5_W5bJ@DX_XMGNQ+VR&lh6-% zAW2LT?J_Rj*)|DQ%v`UDom0SeO=kX18oB9W1J^t&uJuh5V07lLAoNdMn*QRmNHsGY z+7bmYI+etRJbcqy{$w~t5!z6u@Ox8r@be?g|6SY;^K{Jwakj{y4c)B5%wh+Z+!HzX zpjmnZ`S)GUT&beJj8PaU$Z4Z{3)y}^*l&kQ=76(+PMUq)gA8)Y+Quc5W%7umL#^-z z~ww#{GHLFlY)XpT|R{WiLS$Kt9b{Q;A{QNDG#Fl4xSu5-&aq{Sn9mjC+_k$QT=&RMXX z88vsfS%=bPj&zmXWTE1YV=LSnVU=hbH=DZ^*tQ@fZLs0kys0b`gKjgfcI+Qy;8@kR zxXRFFUu4Wo$4^_wsr4HrpJ~1wGeG>8a~n+_4H5As&#Qm$s?jrYj64e4Y9ZXzu8A{m zw!-RFS(kUQojojWCU>iiF=JhJjV-G3_|(cfdJ-tr+5wAO>TY)hr6tEMb!vrwQl3eY zUSD*i^1ZGnxgYdP!Wsp`^$_d4uzF77B?tMlmx0>VPg-FHi)LeTZqJeE#e(tKZ3`gx z%rn`TdPpNRD-|8z*!O0|3Ew;i`HwFHv74P+i`XaWsR`UbxW8ZNMLh=*v%8y@y32`4aY0wTE{qtxZxrgf~ ztci0J|Ne53n?6ibVZ58W+{Vnt+caGfa>jFAt2wuB;oy4)Hm4DVG0j)UJsy{*o?j|0 zZ;-#}6?=el2UpmhH5>dgI2ZcpFTZuI<(!Z4E${D*CtrBE$hnseoo}_T`ele^+0X;Z zsjamr#}?UEcf4ZlmIXJ$s_c)?KdE#o%1C+Oujjm412>SWa<95ML7eas%;lV{Th}8$ zZbfZG7PQ1`%M#OD`)(Ti{QxFY#12k(EDO)Oe=ajz&mxur$LOwWlgrGTUmw^w-EsB$ z3F5++gIyQ(GTYW#mCf5P+Gl+#*z=O{+vd31{+}$P5$c!V6Z(gr{Wp4It1&d&esQMx zYbm_;nI3sbrltQ(lkh!RMjx~>o#vC9o1VU|(^?%|PWsV86Z=2opOarYg0@3=#BmTb!q2%@flG6@R{?R=4ZAxSqPfU zgd-EU3sk)nv4sBCRijFJsxb(bfAxh|<}}Q|;R7M4+p2$Y+0Drk%J)(GaK+2ud zt8e;u*-+K-lU-c&IVW593|)o`WG2gy>A9CB%(^|P`pedeYAlnb9yTt1q?ObKyrdqq z-=UIp*Xag{29EI90^2z^qQJj@AFiH?xru)&KMwQt@h-Ca#52w#Cu-lXM%UB#-bhN& zX*y?EykSB9*@PY2{3GwQRzTC)!(GC>Oh>U)`T6g|*^d>hI_Q>3MUKTHwd-pK?mJWz z&U!<$CuP|WzC|9>UH+Yw7g?H=B8=+c)a&8VdoD53w|ws`2~An{SH(7e_nQ~~P~Fe- zn3rmox8i2X;lI)fved(zUn?AZZMtp$+a=%cl6BC?roQ!P{yW(m{`+vu#P?6U>;2{b E13MvD<^TWy literal 0 HcmV?d00001 diff --git a/tests/tests/wgpu-gpu/mesh_shader/mod.rs b/tests/tests/wgpu-gpu/mesh_shader/mod.rs index 8a3218970f0..2288c4089d8 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/mod.rs +++ b/tests/tests/wgpu-gpu/mesh_shader/mod.rs @@ -98,6 +98,18 @@ fn compile_hlsl( } } +fn compile_msl(device: &wgpu::Device, entry: &str) -> wgpu::ShaderModule { + unsafe { + device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough { + entry_point: entry.to_owned(), + label: None, + msl: Some(std::borrow::Cow::Borrowed(include_str!("shader.metal"))), + num_workgroups: (1, 1, 1), + ..Default::default() + }) + } +} + fn get_shaders( device: &wgpu::Device, backend: wgpu::Backend, @@ -114,8 +126,8 @@ fn get_shaders( // (In the case that the platform does support mesh shaders, the dummy // shader is used to avoid requiring EXPERIMENTAL_PASSTHROUGH_SHADERS.) let dummy_shader = device.create_shader_module(wgpu::include_wgsl!("non_mesh.wgsl")); - if backend == wgpu::Backend::Vulkan { - ( + match backend { + wgpu::Backend::Vulkan => ( info.use_task.then(|| compile_glsl(device, "task")), if info.use_mesh { compile_glsl(device, "mesh") @@ -123,9 +135,8 @@ fn get_shaders( dummy_shader }, info.use_frag.then(|| compile_glsl(device, "frag")), - ) - } else if backend == wgpu::Backend::Dx12 { - ( + ), + wgpu::Backend::Dx12 => ( info.use_task .then(|| compile_hlsl(device, "Task", "as", test_name)), if info.use_mesh { @@ -135,11 +146,21 @@ fn get_shaders( }, info.use_frag .then(|| compile_hlsl(device, "Frag", "ps", test_name)), - ) - } else { - assert!(!MESH_SHADER_BACKENDS.contains(Backends::from(backend))); - assert!(!info.use_task && !info.use_mesh && !info.use_frag); - (None, dummy_shader, None) + ), + wgpu::Backend::Metal => ( + info.use_task.then(|| compile_msl(device, "taskShader")), + if info.use_mesh { + compile_msl(device, "meshShader") + } else { + dummy_shader + }, + info.use_frag.then(|| compile_msl(device, "fragShader")), + ), + _ => { + assert!(!MESH_SHADER_BACKENDS.contains(Backends::from(backend))); + assert!(!info.use_task && !info.use_mesh && !info.use_frag); + (None, dummy_shader, None) + } } } diff --git a/tests/tests/wgpu-gpu/mesh_shader/shader.metal b/tests/tests/wgpu-gpu/mesh_shader/shader.metal new file mode 100644 index 00000000000..4c7da503832 --- /dev/null +++ b/tests/tests/wgpu-gpu/mesh_shader/shader.metal @@ -0,0 +1,77 @@ +using namespace metal; + +struct OutVertex { + float4 Position [[position]]; + float4 Color [[user(locn0)]]; +}; + +struct OutPrimitive { + float4 ColorMask [[flat]] [[user(locn1)]]; + bool CullPrimitive [[primitive_culled]]; +}; + +struct InVertex { +}; + +struct InPrimitive { + float4 ColorMask [[flat]] [[user(locn1)]]; +}; + +struct FragmentIn { + float4 Color [[user(locn0)]]; + float4 ColorMask [[flat]] [[user(locn1)]]; +}; + +struct PayloadData { + float4 ColorMask; + bool Visible; +}; + +using Meshlet = metal::mesh; + + +constant float4 positions[3] = { + float4(0.0, 1.0, 0.0, 1.0), + float4(-1.0, -1.0, 0.0, 1.0), + float4(1.0, -1.0, 0.0, 1.0) +}; + +constant float4 colors[3] = { + float4(0.0, 1.0, 0.0, 1.0), + float4(0.0, 0.0, 1.0, 1.0), + float4(1.0, 0.0, 0.0, 1.0) +}; + + +[[object]] +void taskShader(uint3 tid [[thread_position_in_grid]], object_data PayloadData &outPayload [[payload]], mesh_grid_properties grid) { + outPayload.ColorMask = float4(1.0, 1.0, 0.0, 1.0); + outPayload.Visible = true; + grid.set_threadgroups_per_grid(uint3(3, 1, 1)); +} + +[[mesh]] +void meshShader( + object_data PayloadData const& payload [[payload]], + Meshlet out +) +{ + out.set_primitive_count(1); + + for(int i = 0;i < 3;i++) { + OutVertex vert; + vert.Position = positions[i]; + vert.Color = colors[i] * payload.ColorMask; + out.set_vertex(i, vert); + out.set_index(i, i); + } + + OutPrimitive prim; + prim.ColorMask = float4(1.0, 0.0, 0.0, 1.0); + prim.CullPrimitive = !payload.Visible; + out.set_primitive(0, prim); +} + +fragment float4 fragShader(FragmentIn data [[stage_in]]) { + return data.Color * data.ColorMask; +} diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs index c36a16c35ea..1741b7a14b1 100644 --- a/wgpu-types/src/features.rs +++ b/wgpu-types/src/features.rs @@ -1166,12 +1166,11 @@ bitflags_array! { /// This is a native only feature. const UNIFORM_BUFFER_BINDING_ARRAYS = 1 << 47; - /// Enables mesh shaders and task shaders in mesh shader pipelines. + /// Enables mesh shaders and task shaders in mesh shader pipelines. This extension does NOT imply support for + /// compiling mesh shaders at runtime. Rather, the user must use custom passthrough shaders. /// /// Supported platforms: /// - Vulkan (with [VK_EXT_mesh_shader](https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_mesh_shader.html)) - /// - /// Potential Platforms: /// - DX12 /// - Metal /// diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 10545bc560d..f91c6547649 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1045,7 +1045,7 @@ impl Limits { #[must_use] pub const fn using_recommended_minimum_mesh_shader_values(self) -> Self { Self { - // I believe this is a common limit for apple devices. I'm not entirely sure why. + // This is a common limit for apple devices. It's not immediately clear why. max_task_workgroup_total_count: 1024, max_task_workgroups_per_dimension: 1024, // llvmpipe reports 0 multiview count, which just means no multiview is allowed From 9f5e3ff2fe5d9dc0c42539ee87ad55fb777ddbf2 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 29 Oct 2025 14:06:02 -0500 Subject: [PATCH 27/35] Made tests actually run on metal --- tests/tests/wgpu-gpu/mesh_shader/mod.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/tests/wgpu-gpu/mesh_shader/mod.rs b/tests/tests/wgpu-gpu/mesh_shader/mod.rs index 2288c4089d8..1b79770b254 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/mod.rs +++ b/tests/tests/wgpu-gpu/mesh_shader/mod.rs @@ -3,15 +3,11 @@ use std::{ process::Stdio, }; -use wgpu::{util::DeviceExt, Backends}; +use wgpu::util::DeviceExt; use wgpu_test::{ - fail, gpu_test, FailureCase, GpuTestConfiguration, GpuTestInitializer, TestParameters, - TestingContext, + fail, gpu_test, GpuTestConfiguration, GpuTestInitializer, TestParameters, TestingContext, }; -/// Backends that support mesh shaders -const MESH_SHADER_BACKENDS: Backends = Backends::DX12.union(Backends::VULKAN); - pub fn all_tests(tests: &mut Vec) { tests.extend([ MESH_PIPELINE_BASIC_MESH, @@ -157,7 +153,6 @@ fn get_shaders( info.use_frag.then(|| compile_msl(device, "fragShader")), ), _ => { - assert!(!MESH_SHADER_BACKENDS.contains(Backends::from(backend))); assert!(!info.use_task && !info.use_mesh && !info.use_frag); (None, dummy_shader, None) } @@ -396,7 +391,6 @@ fn mesh_draw(ctx: &TestingContext, draw_type: DrawType) { fn default_gpu_test_config(draw_type: DrawType) -> GpuTestConfiguration { GpuTestConfiguration::new().parameters( TestParameters::default() - .skip(FailureCase::backend(!MESH_SHADER_BACKENDS)) .test_features_limits() .features( wgpu::Features::EXPERIMENTAL_MESH_SHADER From e02e379a9dc56eeb8f5ddf203b6be1059c5afb93 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 29 Oct 2025 14:40:08 -0500 Subject: [PATCH 28/35] Tried to improve one part of the code --- wgpu-hal/src/metal/command.rs | 263 +++++++++++++++++----------------- 1 file changed, 131 insertions(+), 132 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 46cf52716c8..bf911362e58 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -143,6 +143,127 @@ impl super::CommandEncoder { self.state.reset(); self.leave_blit(); } + + /// Updates the bindings for a single shader stage, called in `set_bind_group`. + #[expect(clippy::too_many_arguments)] + fn update_bind_group_state( + &mut self, + stage: naga::ShaderStage, + render_encoder: Option<&metal::RenderCommandEncoder>, + compute_encoder: Option<&metal::ComputeCommandEncoder>, + index_base: super::ResourceData, + bg_info: &super::BindGroupLayoutInfo, + dynamic_offsets: &[wgt::DynamicOffset], + group_index: u32, + group: &super::BindGroup, + ) { + let resource_indices = match stage { + naga::ShaderStage::Vertex => &bg_info.base_resource_indices.vs, + naga::ShaderStage::Fragment => &bg_info.base_resource_indices.fs, + naga::ShaderStage::Task => &bg_info.base_resource_indices.ts, + naga::ShaderStage::Mesh => &bg_info.base_resource_indices.ms, + naga::ShaderStage::Compute => &bg_info.base_resource_indices.cs, + }; + let buffers = match stage { + naga::ShaderStage::Vertex => group.counters.vs.buffers, + naga::ShaderStage::Fragment => group.counters.fs.buffers, + naga::ShaderStage::Task => group.counters.ts.buffers, + naga::ShaderStage::Mesh => group.counters.ms.buffers, + naga::ShaderStage::Compute => group.counters.cs.buffers, + }; + let mut changes_sizes_buffer = false; + for index in 0..buffers { + let buf = &group.buffers[(index_base.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + let a1 = (resource_indices.buffers + index) as u64; + let a2 = Some(buf.ptr.as_native()); + let a3 = offset; + match stage { + naga::ShaderStage::Vertex => render_encoder.unwrap().set_vertex_buffer(a1, a2, a3), + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_buffer(a1, a2, a3) + } + naga::ShaderStage::Task => render_encoder.unwrap().set_object_buffer(a1, a2, a3), + naga::ShaderStage::Mesh => render_encoder.unwrap().set_mesh_buffer(a1, a2, a3), + naga::ShaderStage::Compute => compute_encoder.unwrap().set_buffer(a1, a2, a3), + } + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(stage, &mut self.temp.binding_sizes) + { + let a1 = index as _; + let a2 = (sizes.len() * WORD_SIZE) as u64; + let a3 = sizes.as_ptr().cast(); + match stage { + naga::ShaderStage::Vertex => { + render_encoder.unwrap().set_vertex_bytes(a1, a2, a3) + } + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_bytes(a1, a2, a3) + } + naga::ShaderStage::Task => render_encoder.unwrap().set_object_bytes(a1, a2, a3), + naga::ShaderStage::Mesh => render_encoder.unwrap().set_mesh_bytes(a1, a2, a3), + naga::ShaderStage::Compute => compute_encoder.unwrap().set_bytes(a1, a2, a3), + } + } + } + let samplers = match stage { + naga::ShaderStage::Vertex => group.counters.vs.samplers, + naga::ShaderStage::Fragment => group.counters.fs.samplers, + naga::ShaderStage::Task => group.counters.ts.samplers, + naga::ShaderStage::Mesh => group.counters.ms.samplers, + naga::ShaderStage::Compute => group.counters.cs.samplers, + }; + for index in 0..samplers { + let res = group.samplers[(index_base.samplers + index) as usize]; + let a1 = (resource_indices.samplers + index) as u64; + let a2 = Some(res.as_native()); + match stage { + naga::ShaderStage::Vertex => { + render_encoder.unwrap().set_vertex_sampler_state(a1, a2) + } + naga::ShaderStage::Fragment => { + render_encoder.unwrap().set_fragment_sampler_state(a1, a2) + } + naga::ShaderStage::Task => render_encoder.unwrap().set_object_sampler_state(a1, a2), + naga::ShaderStage::Mesh => render_encoder.unwrap().set_mesh_sampler_state(a1, a2), + naga::ShaderStage::Compute => compute_encoder.unwrap().set_sampler_state(a1, a2), + } + } + + let textures = match stage { + naga::ShaderStage::Vertex => group.counters.vs.textures, + naga::ShaderStage::Fragment => group.counters.fs.textures, + naga::ShaderStage::Task => group.counters.ts.textures, + naga::ShaderStage::Mesh => group.counters.ms.textures, + naga::ShaderStage::Compute => group.counters.cs.textures, + }; + for index in 0..textures { + let res = group.textures[(index_base.textures + index) as usize]; + let a1 = (resource_indices.textures + index) as u64; + let a2 = Some(res.as_native()); + match stage { + naga::ShaderStage::Vertex => render_encoder.unwrap().set_vertex_texture(a1, a2), + naga::ShaderStage::Fragment => render_encoder.unwrap().set_fragment_texture(a1, a2), + naga::ShaderStage::Task => render_encoder.unwrap().set_object_texture(a1, a2), + naga::ShaderStage::Mesh => render_encoder.unwrap().set_mesh_texture(a1, a2), + naga::ShaderStage::Compute => compute_encoder.unwrap().set_texture(a1, a2), + } + } + } } impl super::CommandState { @@ -683,138 +804,16 @@ impl crate::CommandEncoder for super::CommandEncoder { render_encoder: Option<&metal::RenderCommandEncoder>, compute_encoder: Option<&metal::ComputeCommandEncoder>, index_base: super::ResourceData| { - let resource_indices = match stage { - naga::ShaderStage::Vertex => &bg_info.base_resource_indices.vs, - naga::ShaderStage::Fragment => &bg_info.base_resource_indices.fs, - naga::ShaderStage::Task => &bg_info.base_resource_indices.ts, - naga::ShaderStage::Mesh => &bg_info.base_resource_indices.ms, - naga::ShaderStage::Compute => &bg_info.base_resource_indices.cs, - }; - let buffers = match stage { - naga::ShaderStage::Vertex => group.counters.vs.buffers, - naga::ShaderStage::Fragment => group.counters.fs.buffers, - naga::ShaderStage::Task => group.counters.ts.buffers, - naga::ShaderStage::Mesh => group.counters.ms.buffers, - naga::ShaderStage::Compute => group.counters.cs.buffers, - }; - let mut changes_sizes_buffer = false; - for index in 0..buffers { - let buf = &group.buffers[(index_base.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - let a1 = (resource_indices.buffers + index) as u64; - let a2 = Some(buf.ptr.as_native()); - let a3 = offset; - match stage { - naga::ShaderStage::Vertex => { - render_encoder.unwrap().set_vertex_buffer(a1, a2, a3) - } - naga::ShaderStage::Fragment => { - render_encoder.unwrap().set_fragment_buffer(a1, a2, a3) - } - naga::ShaderStage::Task => { - render_encoder.unwrap().set_object_buffer(a1, a2, a3) - } - naga::ShaderStage::Mesh => { - render_encoder.unwrap().set_mesh_buffer(a1, a2, a3) - } - naga::ShaderStage::Compute => { - compute_encoder.unwrap().set_buffer(a1, a2, a3) - } - } - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; - } - } - if changes_sizes_buffer { - if let Some((index, sizes)) = self - .state - .make_sizes_buffer_update(stage, &mut self.temp.binding_sizes) - { - let a1 = index as _; - let a2 = (sizes.len() * WORD_SIZE) as u64; - let a3 = sizes.as_ptr().cast(); - match stage { - naga::ShaderStage::Vertex => { - render_encoder.unwrap().set_vertex_bytes(a1, a2, a3) - } - naga::ShaderStage::Fragment => { - render_encoder.unwrap().set_fragment_bytes(a1, a2, a3) - } - naga::ShaderStage::Task => { - render_encoder.unwrap().set_object_bytes(a1, a2, a3) - } - naga::ShaderStage::Mesh => { - render_encoder.unwrap().set_mesh_bytes(a1, a2, a3) - } - naga::ShaderStage::Compute => { - compute_encoder.unwrap().set_bytes(a1, a2, a3) - } - } - } - } - let samplers = match stage { - naga::ShaderStage::Vertex => group.counters.vs.samplers, - naga::ShaderStage::Fragment => group.counters.fs.samplers, - naga::ShaderStage::Task => group.counters.ts.samplers, - naga::ShaderStage::Mesh => group.counters.ms.samplers, - naga::ShaderStage::Compute => group.counters.cs.samplers, - }; - for index in 0..samplers { - let res = group.samplers[(index_base.samplers + index) as usize]; - let a1 = (resource_indices.samplers + index) as u64; - let a2 = Some(res.as_native()); - match stage { - naga::ShaderStage::Vertex => { - render_encoder.unwrap().set_vertex_sampler_state(a1, a2) - } - naga::ShaderStage::Fragment => { - render_encoder.unwrap().set_fragment_sampler_state(a1, a2) - } - naga::ShaderStage::Task => { - render_encoder.unwrap().set_object_sampler_state(a1, a2) - } - naga::ShaderStage::Mesh => { - render_encoder.unwrap().set_mesh_sampler_state(a1, a2) - } - naga::ShaderStage::Compute => { - compute_encoder.unwrap().set_sampler_state(a1, a2) - } - } - } - - let textures = match stage { - naga::ShaderStage::Vertex => group.counters.vs.textures, - naga::ShaderStage::Fragment => group.counters.fs.textures, - naga::ShaderStage::Task => group.counters.ts.textures, - naga::ShaderStage::Mesh => group.counters.ms.textures, - naga::ShaderStage::Compute => group.counters.cs.textures, - }; - for index in 0..textures { - let res = group.textures[(index_base.textures + index) as usize]; - let a1 = (resource_indices.textures + index) as u64; - let a2 = Some(res.as_native()); - match stage { - naga::ShaderStage::Vertex => { - render_encoder.unwrap().set_vertex_texture(a1, a2) - } - naga::ShaderStage::Fragment => { - render_encoder.unwrap().set_fragment_texture(a1, a2) - } - naga::ShaderStage::Task => { - render_encoder.unwrap().set_object_texture(a1, a2) - } - naga::ShaderStage::Mesh => render_encoder.unwrap().set_mesh_texture(a1, a2), - naga::ShaderStage::Compute => compute_encoder.unwrap().set_texture(a1, a2), - } - } + self.update_bind_group_state( + stage, + render_encoder, + compute_encoder, + index_base, + bg_info, + dynamic_offsets, + group_index, + group, + ); }; if let Some(encoder) = render_encoder { update_stage( From 6937fa26e63348ca0935daa83f7d6ce7ae0675e2 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 29 Oct 2025 15:02:56 -0500 Subject: [PATCH 29/35] Updated feature check to hopefully fix CI --- wgpu-hal/src/metal/adapter.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index e3cfb8d134a..5efd393ac2d 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -902,8 +902,10 @@ impl super::PrivateCapabilities { && (device.supports_family(MTLGPUFamily::Apple7) || device.supports_family(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), - mesh_shaders: device.supports_family(MTLGPUFamily::Apple7) - || device.supports_family(MTLGPUFamily::Mac2), + mesh_shaders: family_check + && device.supports_family(MTLGPUFamily::Metal3) + && (device.supports_family(MTLGPUFamily::Apple7) + || device.supports_family(MTLGPUFamily::Mac2)), shader_barycentrics: device.supports_shader_barycentric_coordinates(), // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=3 supports_memoryless_storage: if family_check { From 8bbcea0a26f6c32060ffb910aad646f4bbcfbed0 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 29 Oct 2025 22:42:46 -0500 Subject: [PATCH 30/35] Smartified mesh shader detection --- wgpu-hal/src/metal/adapter.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 5efd393ac2d..8b460d26472 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -607,6 +607,9 @@ impl super::PrivateCapabilities { let argument_buffers = device.argument_buffers_support(); + // Lmao + let is_virtual = device.name().to_lowercase().contains("virtual"); + Self { family_check, msl_version: if os_is_xr || version.at_least((14, 0), (17, 0), os_is_mac) { @@ -903,9 +906,11 @@ impl super::PrivateCapabilities { || device.supports_family(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), mesh_shaders: family_check - && device.supports_family(MTLGPUFamily::Metal3) - && (device.supports_family(MTLGPUFamily::Apple7) - || device.supports_family(MTLGPUFamily::Mac2)), + && (device.supports_family(MTLGPUFamily::Metal3) + || device.supports_family(MTLGPUFamily::Apple7) + || device.supports_family(MTLGPUFamily::Mac2)) + // Mesh shaders don't work on virtual devices even if they should be supported. + && !is_virtual, shader_barycentrics: device.supports_shader_barycentric_coordinates(), // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=3 supports_memoryless_storage: if family_check { From b4abddd88fe9c663a903a7521971d5d5076415d1 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 29 Oct 2025 22:57:03 -0500 Subject: [PATCH 31/35] Nicified some stuff --- wgpu-hal/src/metal/device.rs | 141 +++++++++++++++++++---------------- 1 file changed, 78 insertions(+), 63 deletions(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index a4cd1341183..0c5de7c9ec2 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1125,53 +1125,61 @@ impl crate::Device for super::Device { let vs_info; let ts_info; let ms_info; + + // Create the pipeline descriptor and do vertex/mesh pipeline specific setup let descriptor = match desc.vertex_processor { crate::VertexProcessor::Standard { vertex_buffers, ref vertex_stage, } => { + // Vertex pipeline specific setup + let descriptor = metal::RenderPipelineDescriptor::new(); ts_info = None; ms_info = None; - vs_info = Some({ - let mut vertex_buffer_mappings = - Vec::::new(); - for (i, vbl) in vertex_buffers.iter().enumerate() { - let mut attributes = Vec::::new(); - for attribute in vbl.attributes.iter() { - attributes.push(naga::back::msl::AttributeMapping { - shader_location: attribute.shader_location, - offset: attribute.offset as u32, - format: convert_vertex_format_to_naga(attribute.format), - }); - } - vertex_buffer_mappings.push(naga::back::msl::VertexBufferMapping { - id: self.shared.private_caps.max_vertex_buffers - 1 - i as u32, - stride: if vbl.array_stride > 0 { - vbl.array_stride.try_into().unwrap() - } else { - vbl.attributes - .iter() - .map(|attribute| attribute.offset + attribute.format.size()) - .max() - .unwrap_or(0) - .try_into() - .unwrap() - }, - step_mode: match (vbl.array_stride == 0, vbl.step_mode) { - (true, _) => naga::back::msl::VertexBufferStepMode::Constant, - (false, wgt::VertexStepMode::Vertex) => { - naga::back::msl::VertexBufferStepMode::ByVertex - } - (false, wgt::VertexStepMode::Instance) => { - naga::back::msl::VertexBufferStepMode::ByInstance - } - }, - attributes, + // Collect vertex buffer mappings + let mut vertex_buffer_mappings = + Vec::::new(); + for (i, vbl) in vertex_buffers.iter().enumerate() { + let mut attributes = Vec::::new(); + for attribute in vbl.attributes.iter() { + attributes.push(naga::back::msl::AttributeMapping { + shader_location: attribute.shader_location, + offset: attribute.offset as u32, + format: convert_vertex_format_to_naga(attribute.format), }); } + let mapping = naga::back::msl::VertexBufferMapping { + id: self.shared.private_caps.max_vertex_buffers - 1 - i as u32, + stride: if vbl.array_stride > 0 { + vbl.array_stride.try_into().unwrap() + } else { + vbl.attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0) + .try_into() + .unwrap() + }, + step_mode: match (vbl.array_stride == 0, vbl.step_mode) { + (true, _) => naga::back::msl::VertexBufferStepMode::Constant, + (false, wgt::VertexStepMode::Vertex) => { + naga::back::msl::VertexBufferStepMode::ByVertex + } + (false, wgt::VertexStepMode::Instance) => { + naga::back::msl::VertexBufferStepMode::ByInstance + } + }, + attributes, + }; + vertex_buffer_mappings.push(mapping); + } + + // Setup vertex shader + { let vs = self.load_shader( vertex_stage, &vertex_buffer_mappings, @@ -1188,7 +1196,7 @@ impl crate::Device for super::Device { ); } - super::PipelineStageInfo { + vs_info = Some(super::PipelineStageInfo { push_constants: desc.layout.push_constants_infos.vs, sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, sized_bindings: vs.sized_bindings, @@ -1196,8 +1204,10 @@ impl crate::Device for super::Device { library: Some(vs.library), raw_wg_size: Default::default(), work_group_memory_sizes: vec![], - } - }); + }); + } + + // Validate vertex buffer count if desc.layout.total_counters.vs.buffers + (vertex_buffers.len() as u32) > self.shared.private_caps.max_vertex_buffers { @@ -1212,6 +1222,7 @@ impl crate::Device for super::Device { )); } + // Set the pipeline vertex buffer info if !vertex_buffers.is_empty() { let vertex_descriptor = metal::VertexDescriptor::new(); for (i, vb) in vertex_buffers.iter().enumerate() { @@ -1250,14 +1261,19 @@ impl crate::Device for super::Device { } descriptor.set_vertex_descriptor(Some(vertex_descriptor)); } + MetalGenericRenderPipelineDescriptor::Standard(descriptor) } crate::VertexProcessor::Mesh { ref task_stage, ref mesh_stage, } => { + // Mesh pipeline specific setup + vs_info = None; let descriptor = metal::MeshRenderPipelineDescriptor::new(); + + // Setup task stage if let Some(ref task_stage) = task_stage { let ts = self.load_shader( task_stage, @@ -1285,6 +1301,8 @@ impl crate::Device for super::Device { } else { ts_info = None; } + + // Setup mesh stage { let ms = self.load_shader( mesh_stage, @@ -1310,9 +1328,13 @@ impl crate::Device for super::Device { work_group_memory_sizes: ms.wg_memory_sizes, }); } + MetalGenericRenderPipelineDescriptor::Mesh(descriptor) } }; + + // Standard and mesh render pipeline descriptors don't inherit from the same interface, despite sharing + // many methods. This function lets us call a function by name on whichever descriptor we are using. macro_rules! descriptor_fn { ($method:ident $( ( $($args:expr),* ) )? ) => { match descriptor { @@ -1372,6 +1394,7 @@ impl crate::Device for super::Device { } }; + // Setup pipeline color attachments for (i, ct) in desc.color_targets.iter().enumerate() { let at_descriptor = descriptor_fn!(color_attachments()) .object_at(i as u64) @@ -1402,6 +1425,7 @@ impl crate::Device for super::Device { } } + // Setup depth stencil state let depth_stencil = match desc.depth_stencil { Some(ref ds) => { let raw_format = self.shared.private_caps.map_format(ds.format); @@ -1424,6 +1448,7 @@ impl crate::Device for super::Device { None => None, }; + // Setup multisample state if desc.multisample.count != 1 { //TODO: handle sample mask match descriptor { @@ -1440,36 +1465,26 @@ impl crate::Device for super::Device { //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); } + // Set debug label if let Some(name) = desc.label { descriptor_fn!(set_label(name)); } + // Create the pipeline from descriptor let raw = match descriptor { - MetalGenericRenderPipelineDescriptor::Standard(d) => self - .shared - .device - .lock() - .new_render_pipeline_state(&d) - .map_err(|e| { - crate::PipelineError::Linkage( - wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, - format!("new_render_pipeline_state: {e:?}"), - ) - })?, - MetalGenericRenderPipelineDescriptor::Mesh(d) => self - .shared - .device - .lock() - .new_mesh_render_pipeline_state(&d) - .map_err(|e| { - crate::PipelineError::Linkage( - wgt::ShaderStages::TASK - | wgt::ShaderStages::MESH - | wgt::ShaderStages::FRAGMENT, - format!("new_mesh_render_pipeline_state: {e:?}"), - ) - })?, - }; + MetalGenericRenderPipelineDescriptor::Standard(d) => { + self.shared.device.lock().new_render_pipeline_state(&d) + } + MetalGenericRenderPipelineDescriptor::Mesh(d) => { + self.shared.device.lock().new_mesh_render_pipeline_state(&d) + } + } + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {e:?}"), + ) + })?; self.counters.render_pipelines.add(1); From d8d3a612b3886e1aba619b03325d06852b76b37c Mon Sep 17 00:00:00 2001 From: Inner Daemons <85136135+inner-daemons@users.noreply.github.com> Date: Wed, 12 Nov 2025 18:54:52 -0600 Subject: [PATCH 32/35] Update wgpu-hal/src/metal/adapter.rs Co-authored-by: Connor Fitzgerald --- wgpu-hal/src/metal/adapter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 369fd5a885c..f4e0f43e2bf 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -607,7 +607,6 @@ impl super::PrivateCapabilities { let argument_buffers = device.argument_buffers_support(); - // Lmao let is_virtual = device.name().to_lowercase().contains("virtual"); Self { From 9acc91821e3c9ad74eac026c41bbc804c5762a8e Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 12 Nov 2025 19:08:47 -0600 Subject: [PATCH 33/35] Update macro & other cleanup --- wgpu-hal/src/metal/command.rs | 2 +- wgpu-hal/src/metal/device.rs | 42 +++++++++++++++++------------------ 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index ec3089d1028..d62d7101007 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1351,7 +1351,7 @@ impl crate::CommandEncoder for super::CommandEncoder { _count_offset: wgt::BufferAddress, _max_count: u32, ) { - //TODO + unreachable!() } // compute diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 26b4e2239cb..0c407777812 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -18,11 +18,6 @@ use metal::{ type DeviceResult = Result; -enum MetalGenericRenderPipelineDescriptor { - Standard(metal::RenderPipelineDescriptor), - Mesh(metal::MeshRenderPipelineDescriptor), -} - struct CompiledShader { library: metal::Library, function: metal::Function, @@ -1119,6 +1114,11 @@ impl crate::Device for super::Device { >, ) -> Result { objc::rc::autoreleasepool(|| { + enum MetalGenericRenderPipelineDescriptor { + Standard(metal::RenderPipelineDescriptor), + Mesh(metal::MeshRenderPipelineDescriptor), + } + let (primitive_class, raw_primitive_type) = conv::map_primitive_topology(desc.primitive.topology); @@ -1336,8 +1336,8 @@ impl crate::Device for super::Device { // Standard and mesh render pipeline descriptors don't inherit from the same interface, despite sharing // many methods. This function lets us call a function by name on whichever descriptor we are using. macro_rules! descriptor_fn { - ($method:ident $( ( $($args:expr),* ) )? ) => { - match descriptor { + ($descriptor:ident . $method:ident $( ( $($args:expr),* ) )? ) => { + match $descriptor { MetalGenericRenderPipelineDescriptor::Standard(ref inner) => inner.$method$(($($args),*))?, MetalGenericRenderPipelineDescriptor::Mesh(ref inner) => inner.$method$(($($args),*))?, } @@ -1364,10 +1364,10 @@ impl crate::Device for super::Device { naga::ShaderStage::Fragment, )?; - descriptor_fn!(set_fragment_function(Some(&fs.function))); + descriptor_fn!(descriptor.set_fragment_function(Some(&fs.function))); if self.shared.private_caps.supports_mutability { Self::set_buffers_mutability( - descriptor_fn!(fragment_buffers()).unwrap(), + descriptor_fn!(descriptor.fragment_buffers()).unwrap(), fs.immutable_buffer_mask, ); } @@ -1386,9 +1386,8 @@ impl crate::Device for super::Device { // TODO: This is a workaround for what appears to be a Metal validation bug // A pixel format is required even though no attachments are provided if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { - descriptor_fn!(set_depth_attachment_pixel_format( - MTLPixelFormat::Depth32Float - )); + descriptor_fn!(descriptor + .set_depth_attachment_pixel_format(MTLPixelFormat::Depth32Float)); } None } @@ -1396,7 +1395,7 @@ impl crate::Device for super::Device { // Setup pipeline color attachments for (i, ct) in desc.color_targets.iter().enumerate() { - let at_descriptor = descriptor_fn!(color_attachments()) + let at_descriptor = descriptor_fn!(descriptor.color_attachments()) .object_at(i as u64) .unwrap(); let ct = if let Some(color_target) = ct.as_ref() { @@ -1431,10 +1430,10 @@ impl crate::Device for super::Device { let raw_format = self.shared.private_caps.map_format(ds.format); let aspects = crate::FormatAspects::from(ds.format); if aspects.contains(crate::FormatAspects::DEPTH) { - descriptor_fn!(set_depth_attachment_pixel_format(raw_format)); + descriptor_fn!(descriptor.set_depth_attachment_pixel_format(raw_format)); } if aspects.contains(crate::FormatAspects::STENCIL) { - descriptor_fn!(set_stencil_attachment_pixel_format(raw_format)); + descriptor_fn!(descriptor.set_stencil_attachment_pixel_format(raw_format)); } let ds_descriptor = create_depth_stencil_desc(ds); @@ -1459,20 +1458,19 @@ impl crate::Device for super::Device { inner.set_raster_sample_count(desc.multisample.count as u64); } } - descriptor_fn!(set_alpha_to_coverage_enabled( - desc.multisample.alpha_to_coverage_enabled - )); + descriptor_fn!(descriptor + .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled)); //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); } // Set debug label if let Some(name) = desc.label { - descriptor_fn!(set_label(name)); + descriptor_fn!(descriptor.set_label(name)); } if let Some(mv) = desc.multiview_mask { - descriptor_fn!(set_max_vertex_amplification_count( - mv.get().count_ones() as u64 - )); + descriptor_fn!( + descriptor.set_max_vertex_amplification_count(mv.get().count_ones() as u64) + ); } // Create the pipeline from descriptor From ae8cdb65b06149c16ecf57ae2bf335b5713279bb Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Wed, 12 Nov 2025 19:11:14 -0600 Subject: [PATCH 34/35] Removed closure --- wgpu-hal/src/metal/command.rs | 46 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index d62d7101007..86be90427d7 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -824,44 +824,40 @@ impl crate::CommandEncoder for super::CommandEncoder { let bg_info = &layout.bind_group_infos[group_index as usize]; let render_encoder = self.state.render.clone(); let compute_encoder = self.state.compute.clone(); - let mut update_stage = - |stage: naga::ShaderStage, - render_encoder: Option<&metal::RenderCommandEncoder>, - compute_encoder: Option<&metal::ComputeCommandEncoder>, - index_base: super::ResourceData| { - self.update_bind_group_state( - stage, - render_encoder, - compute_encoder, - index_base, - bg_info, - dynamic_offsets, - group_index, - group, - ); - }; if let Some(encoder) = render_encoder { - update_stage( + self.update_bind_group_state( naga::ShaderStage::Vertex, Some(&encoder), None, // All zeros, as vs comes first super::ResourceData::default(), + bg_info, + dynamic_offsets, + group_index, + group, ); - update_stage( + self.update_bind_group_state( naga::ShaderStage::Task, Some(&encoder), None, // All zeros, as ts comes first super::ResourceData::default(), + bg_info, + dynamic_offsets, + group_index, + group, ); - update_stage( + self.update_bind_group_state( naga::ShaderStage::Mesh, Some(&encoder), None, group.counters.ts.clone(), + bg_info, + dynamic_offsets, + group_index, + group, ); - update_stage( + self.update_bind_group_state( naga::ShaderStage::Fragment, Some(&encoder), None, @@ -876,6 +872,10 @@ impl crate::CommandEncoder for super::CommandEncoder { + group.counters.ts.samplers + group.counters.ms.samplers, }, + bg_info, + dynamic_offsets, + group_index, + group, ); // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { @@ -883,7 +883,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } } if let Some(encoder) = compute_encoder { - update_stage( + self.update_bind_group_state( naga::ShaderStage::Compute, None, Some(&encoder), @@ -901,6 +901,10 @@ impl crate::CommandEncoder for super::CommandEncoder { + group.counters.ms.samplers + group.counters.fs.samplers, }, + bg_info, + dynamic_offsets, + group_index, + group, ); // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { From 53682cb4a0e903eff71274a73faf51bc7251ab21 Mon Sep 17 00:00:00 2001 From: SupaMaggie70 Date: Thu, 13 Nov 2025 00:08:21 -0600 Subject: [PATCH 35/35] Updated stuff --- wgpu-hal/src/metal/device.rs | 72 +++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 0c407777812..f7bcca72515 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1118,6 +1118,42 @@ impl crate::Device for super::Device { Standard(metal::RenderPipelineDescriptor), Mesh(metal::MeshRenderPipelineDescriptor), } + macro_rules! descriptor_fn { + ($descriptor:ident . $method:ident $( ( $($args:expr),* ) )? ) => { + match $descriptor { + MetalGenericRenderPipelineDescriptor::Standard(ref inner) => inner.$method$(($($args),*))?, + MetalGenericRenderPipelineDescriptor::Mesh(ref inner) => inner.$method$(($($args),*))?, + } + }; + } + impl MetalGenericRenderPipelineDescriptor { + fn set_fragment_function(&self, function: Option<&metal::FunctionRef>) { + descriptor_fn!(self.set_fragment_function(function)); + } + fn fragment_buffers(&self) -> Option<&metal::PipelineBufferDescriptorArrayRef> { + descriptor_fn!(self.fragment_buffers()) + } + fn set_depth_attachment_pixel_format(&self, pixel_format: MTLPixelFormat) { + descriptor_fn!(self.set_depth_attachment_pixel_format(pixel_format)); + } + fn color_attachments( + &self, + ) -> &metal::RenderPipelineColorAttachmentDescriptorArrayRef { + descriptor_fn!(self.color_attachments()) + } + fn set_stencil_attachment_pixel_format(&self, pixel_format: MTLPixelFormat) { + descriptor_fn!(self.set_stencil_attachment_pixel_format(pixel_format)); + } + fn set_alpha_to_coverage_enabled(&self, enabled: bool) { + descriptor_fn!(self.set_alpha_to_coverage_enabled(enabled)); + } + fn set_label(&self, label: &str) { + descriptor_fn!(self.set_label(label)); + } + fn set_max_vertex_amplification_count(&self, count: metal::NSUInteger) { + descriptor_fn!(self.set_max_vertex_amplification_count(count)) + } + } let (primitive_class, raw_primitive_type) = conv::map_primitive_topology(desc.primitive.topology); @@ -1333,17 +1369,6 @@ impl crate::Device for super::Device { } }; - // Standard and mesh render pipeline descriptors don't inherit from the same interface, despite sharing - // many methods. This function lets us call a function by name on whichever descriptor we are using. - macro_rules! descriptor_fn { - ($descriptor:ident . $method:ident $( ( $($args:expr),* ) )? ) => { - match $descriptor { - MetalGenericRenderPipelineDescriptor::Standard(ref inner) => inner.$method$(($($args),*))?, - MetalGenericRenderPipelineDescriptor::Mesh(ref inner) => inner.$method$(($($args),*))?, - } - }; - } - let raw_triangle_fill_mode = match desc.primitive.polygon_mode { wgt::PolygonMode::Fill => MTLTriangleFillMode::Fill, wgt::PolygonMode::Line => MTLTriangleFillMode::Lines, @@ -1364,10 +1389,10 @@ impl crate::Device for super::Device { naga::ShaderStage::Fragment, )?; - descriptor_fn!(descriptor.set_fragment_function(Some(&fs.function))); + descriptor.set_fragment_function(Some(&fs.function)); if self.shared.private_caps.supports_mutability { Self::set_buffers_mutability( - descriptor_fn!(descriptor.fragment_buffers()).unwrap(), + descriptor.fragment_buffers().unwrap(), fs.immutable_buffer_mask, ); } @@ -1386,8 +1411,7 @@ impl crate::Device for super::Device { // TODO: This is a workaround for what appears to be a Metal validation bug // A pixel format is required even though no attachments are provided if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { - descriptor_fn!(descriptor - .set_depth_attachment_pixel_format(MTLPixelFormat::Depth32Float)); + descriptor.set_depth_attachment_pixel_format(MTLPixelFormat::Depth32Float); } None } @@ -1395,9 +1419,7 @@ impl crate::Device for super::Device { // Setup pipeline color attachments for (i, ct) in desc.color_targets.iter().enumerate() { - let at_descriptor = descriptor_fn!(descriptor.color_attachments()) - .object_at(i as u64) - .unwrap(); + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); let ct = if let Some(color_target) = ct.as_ref() { color_target } else { @@ -1430,10 +1452,10 @@ impl crate::Device for super::Device { let raw_format = self.shared.private_caps.map_format(ds.format); let aspects = crate::FormatAspects::from(ds.format); if aspects.contains(crate::FormatAspects::DEPTH) { - descriptor_fn!(descriptor.set_depth_attachment_pixel_format(raw_format)); + descriptor.set_depth_attachment_pixel_format(raw_format); } if aspects.contains(crate::FormatAspects::STENCIL) { - descriptor_fn!(descriptor.set_stencil_attachment_pixel_format(raw_format)); + descriptor.set_stencil_attachment_pixel_format(raw_format); } let ds_descriptor = create_depth_stencil_desc(ds); @@ -1458,19 +1480,17 @@ impl crate::Device for super::Device { inner.set_raster_sample_count(desc.multisample.count as u64); } } - descriptor_fn!(descriptor - .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled)); + descriptor + .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); } // Set debug label if let Some(name) = desc.label { - descriptor_fn!(descriptor.set_label(name)); + descriptor.set_label(name); } if let Some(mv) = desc.multiview_mask { - descriptor_fn!( - descriptor.set_max_vertex_amplification_count(mv.get().count_ones() as u64) - ); + descriptor.set_max_vertex_amplification_count(mv.get().count_ones() as u64); } // Create the pipeline from descriptor