Skip to content

Commit

Permalink
Merge pull request #12186 from TellowKrinkle/MultiTextureComputeMetal
Browse files Browse the repository at this point in the history
VideoBackends:Metal: Support multiple compute textures
  • Loading branch information
lioncash committed Nov 30, 2023
2 parents 163acb5 + 394dd02 commit d67f54b
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 45 deletions.
6 changes: 4 additions & 2 deletions Source/Core/VideoBackends/Metal/MTLGfx.mm
Expand Up @@ -386,9 +386,11 @@
g_state_tracker->SetSampler(index, state);
}

void Metal::Gfx::SetComputeImageTexture(u32, AbstractTexture* texture, bool read, bool write)
void Metal::Gfx::SetComputeImageTexture(u32 index, AbstractTexture* texture, bool read, bool write)
{
g_state_tracker->SetComputeTexture(static_cast<const Texture*>(texture));
g_state_tracker->SetTexture(index + VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS,
texture ? static_cast<const Texture*>(texture)->GetMTLTexture() :
nullptr);
}

void Metal::Gfx::UnbindTexture(const AbstractTexture* texture)
Expand Down
4 changes: 3 additions & 1 deletion Source/Core/VideoBackends/Metal/MTLPipeline.h
Expand Up @@ -61,12 +61,14 @@ class ComputePipeline : public Shader
MRCOwned<id<MTLComputePipelineState>> pipeline);

id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
bool UsesTexture(u32 index) const { return m_textures & (1 << index); }
u32 GetTextures() const { return m_textures; }
u32 GetSamplers() const { return m_samplers; }
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }

private:
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
u32 m_textures = 0;
u32 m_samplers = 0;
u32 m_buffers = 0;
};
} // namespace Metal
2 changes: 1 addition & 1 deletion Source/Core/VideoBackends/Metal/MTLPipeline.mm
Expand Up @@ -67,5 +67,5 @@ static void GetArguments(NSArray<MTLArgument*>* arguments, u32* textures, u32* s
MRCOwned<id<MTLComputePipelineState>> pipeline)
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
{
GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers);
GetArguments([reflection arguments], &m_textures, &m_samplers, &m_buffers);
}
25 changes: 15 additions & 10 deletions Source/Core/VideoBackends/Metal/MTLStateTracker.h
Expand Up @@ -90,7 +90,6 @@ class StateTracker
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
void SetTexture(u32 idx, id<MTLTexture> texture);
void SetSampler(u32 idx, const SamplerState& sampler);
void SetComputeTexture(const Texture* texture);
void InvalidateUniforms(bool vertex, bool geometry, bool fragment);
void SetUtilityUniform(const void* buffer, size_t size);
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
Expand Down Expand Up @@ -191,11 +190,18 @@ class StateTracker

MRCOwned<id<MTLTexture>> m_dummy_texture;

// Compute has a set of samplers and a set of writable images
static constexpr u32 MAX_COMPUTE_TEXTURES = VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS * 2;
static constexpr u32 MAX_PIXEL_TEXTURES = VideoCommon::MAX_PIXEL_SHADER_SAMPLERS;
static constexpr u32 MAX_TEXTURES = std::max(MAX_PIXEL_TEXTURES, MAX_COMPUTE_TEXTURES);
static constexpr u32 MAX_SAMPLERS =
std::max(VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS);

// MARK: State
u8 m_dirty_textures;
u16 m_dirty_textures;
u8 m_dirty_samplers;
static_assert(sizeof(m_dirty_textures) * 8 >= VideoCommon::MAX_PIXEL_SHADER_SAMPLERS,
"Make these bigger");
static_assert(sizeof(m_dirty_textures) * 8 >= MAX_TEXTURES, "Make this bigger");
static_assert(sizeof(m_dirty_samplers) * 8 >= MAX_SAMPLERS, "Make this bigger");
union Flags
{
struct
Expand All @@ -206,7 +212,6 @@ class StateTracker
bool has_gx_ps_uniform : 1;
bool has_utility_vs_uniform : 1;
bool has_utility_ps_uniform : 1;
bool has_compute_texture : 1;
bool has_pipeline : 1;
bool has_scissor : 1;
bool has_viewport : 1;
Expand Down Expand Up @@ -251,11 +256,11 @@ class StateTracker
Util::Viewport viewport;
const Pipeline* render_pipeline = nullptr;
const ComputePipeline* compute_pipeline = nullptr;
std::array<id<MTLTexture>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> textures = {};
std::array<id<MTLSamplerState>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> samplers = {};
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_min_lod;
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_max_lod;
std::array<SamplerState, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_states;
std::array<id<MTLTexture>, MAX_TEXTURES> textures = {};
std::array<id<MTLSamplerState>, MAX_SAMPLERS> samplers = {};
std::array<float, MAX_SAMPLERS> sampler_min_lod;
std::array<float, MAX_SAMPLERS> sampler_max_lod;
std::array<SamplerState, MAX_SAMPLERS> sampler_states;
const Texture* compute_texture = nullptr;
std::unique_ptr<u8[]> utility_uniform;
u32 utility_uniform_size = 0;
Expand Down
54 changes: 35 additions & 19 deletions Source/Core/VideoBackends/Metal/MTLStateTracker.mm
Expand Up @@ -7,6 +7,7 @@
#include <bit>
#include <mutex>

#include "Common/Align.h"
#include "Common/Assert.h"

#include "Core/System.h"
Expand Down Expand Up @@ -344,8 +345,8 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
m_current.cull_mode = MTLCullModeNone;
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
m_flags.NewEncoder();
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
CheckScissor();
CheckViewport();
ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
Expand All @@ -359,8 +360,8 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
if (m_manual_buffer_upload)
[m_current_compute_encoder waitForFence:m_fence];
m_flags.NewEncoder();
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
}

void Metal::StateTracker::EndRenderPass()
Expand Down Expand Up @@ -535,15 +536,6 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
SetSamplerForce(idx, sampler);
}

void Metal::StateTracker::SetComputeTexture(const Texture* texture)
{
if (m_state.compute_texture != texture)
{
m_state.compute_texture = texture;
m_flags.has_compute_texture = false;
}
}

void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
{
for (size_t i = 0; i < std::size(m_state.textures); ++i)
Expand All @@ -565,12 +557,15 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}

void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
{
// Shader often uses 16-byte aligned types
// Metal validation will complain if our upload is smaller than the struct with padding
size_t aligned_size = Common::AlignUp(size, 16);
if (m_state.utility_uniform_capacity < size)
{
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[size]);
m_state.utility_uniform_capacity = size;
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[aligned_size]);
m_state.utility_uniform_capacity = static_cast<u32>(aligned_size);
}
m_state.utility_uniform_size = size;
m_state.utility_uniform_size = static_cast<u32>(aligned_size);
memcpy(m_state.utility_uniform.get(), buffer, size);
m_flags.has_utility_vs_uniform = false;
m_flags.has_utility_ps_uniform = false;
Expand Down Expand Up @@ -893,10 +888,31 @@ static NSRange RangeOfBits(u32 value)
m_flags.has_pipeline = true;
[enc setComputePipelineState:pipe->GetComputePipeline()];
}
if (!m_flags.has_compute_texture && pipe->UsesTexture(0))
if (u32 dirty = m_dirty_textures & pipe->GetTextures())
{
m_dirty_textures &= ~pipe->GetTextures();
// Since there's two sets of textures, it's likely there'll be a few in each
// Check each set separately to avoid doing too many unneccessary bindings
constexpr u32 lo_mask = (1 << VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS) - 1;
if (u32 lo = dirty & lo_mask)
{
NSRange range = RangeOfBits(lo);
[enc setTextures:&m_state.textures[range.location] withRange:range];
}
if (u32 hi = dirty & ~lo_mask)
{
NSRange range = RangeOfBits(hi);
[enc setTextures:&m_state.textures[range.location] withRange:range];
}
}
if (u32 dirty = m_dirty_samplers & pipe->GetSamplers())
{
m_flags.has_compute_texture = true;
[enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0];
m_dirty_samplers &= ~pipe->GetSamplers();
NSRange range = RangeOfBits(dirty);
[enc setSamplerStates:&m_state.samplers[range.location]
lodMinClamps:&m_state.sampler_min_lod[range.location]
lodMaxClamps:&m_state.sampler_max_lod[range.location]
withRange:range];
}
// Compute and render can't happen at the same time, so just reuse one of the flags
if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))
Expand Down
41 changes: 29 additions & 12 deletions Source/Core/VideoBackends/Metal/MTLUtil.mm
Expand Up @@ -11,6 +11,7 @@

#include "Common/MsgHandler.h"

#include "VideoCommon/Constants.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/Spirv.h"

Expand Down Expand Up @@ -365,10 +366,10 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
// All resources are packed into one descriptor set for compute.
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
#define SAMPLER_BINDING(x) layout(set = 1, binding = x)
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x)
#define IMAGE_BINDING(format, x) layout(format, set = 3, binding = x)
// hlsl to glsl function translation
#define API_METAL 1
Expand Down Expand Up @@ -462,20 +463,13 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0
MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1
MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2
MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3
MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4
MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5
MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6
MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7
MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8
// Dynamic list initialized below Fragment, 1, N, 0, N, N // ps/samp0-N
MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image
// Dynamic list initialized below GLCompute, 1, N, 0, N, N, // cs/samp0-N
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
// Dynamic list initialized below GLCompute, 3, N, 0, N, 0, // cs/img0-N
};

spirv_cross::CompilerMSL::Options options;
Expand Down Expand Up @@ -503,6 +497,29 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {

for (auto& binding : resource_bindings)
compiler.add_msl_resource_binding(binding);
if (stage == ShaderStage::Pixel)
{
for (u32 i = 0; i < VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; i++) // ps/samp0-N
{
compiler.add_msl_resource_binding(
MakeResourceBinding(spv::ExecutionModelFragment, 1, i, 0, i, i));
}
}
else if (stage == ShaderStage::Compute)
{
u32 img = 0;
u32 smp = 0;
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/samp0-N
{
compiler.add_msl_resource_binding(
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, i, 0, img++, smp++));
}
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/img0-N
{
compiler.add_msl_resource_binding(
MakeResourceBinding(spv::ExecutionModelGLCompute, 3, i, 0, img++, 0));
}
}

std::string output(MSL_HEADER);
std::string compiled = compiler.compile();
Expand Down

0 comments on commit d67f54b

Please sign in to comment.