Skip to content
Permalink
Browse files
Merge pull request #11028 from tellowkrinkle/MetalFixes
Various Metal renderer improvements
  • Loading branch information
JMC47 committed Oct 24, 2022
2 parents 4787b25 + fd2680d commit b667931
Show file tree
Hide file tree
Showing 16 changed files with 284 additions and 29 deletions.
@@ -87,6 +87,11 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};

const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE{{System::GFX, "Settings", "MTLUsePresentDrawable"},
false};

const Info<bool> GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false};
const Info<bool> GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false};
const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"},
@@ -11,6 +11,7 @@ enum class AspectMode : int;
enum class ShaderCompilationMode : int;
enum class StereoMode : int;
enum class FreelookControlType : int;
enum class TriState : int;

namespace Config
{
@@ -75,6 +76,9 @@ extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;

extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;

extern const Info<bool> GFX_SW_DUMP_OBJECTS;
extern const Info<bool> GFX_SW_DUMP_TEV_STAGES;
extern const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES;
@@ -39,3 +39,5 @@ PRIVATE
${METAL_LIBRARY}
${QUARTZCORE_LIBRARY}
)

target_compile_options(videometal PRIVATE -fno-objc-arc)
@@ -36,6 +36,7 @@
{
g_state_tracker->EndRenderPass();
g_state_tracker->FlushEncoders();
g_state_tracker->NotifyOfCPUGPUSync();
g_state_tracker->WaitForFlushedEncoders();
return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length);
}
@@ -56,6 +56,7 @@

// There's a possibility that some active performance queries are unflushed
g_state_tracker->FlushEncoders();
g_state_tracker->NotifyOfCPUGPUSync();

std::unique_lock<std::mutex> lock(m_results_mtx);
while (!IsFlushed())
@@ -20,6 +20,7 @@
m_layer(std::move(layer))
{
UpdateActiveConfig();
[m_layer setDisplaySyncEnabled:g_ActiveConfig.bVSyncActive];
}

Metal::Renderer::~Renderer() = default;
@@ -454,8 +455,15 @@
g_state_tracker->EndRenderPass();
if (m_drawable)
{
[g_state_tracker->GetRenderCmdBuf()
addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }];
// PresentDrawable refuses to allow Dolphin to present faster than the display's refresh rate
// when windowed (or fullscreen with vsync enabled, but that's more understandable).
// On the other hand, it helps Xcode's GPU captures start and stop on frame boundaries
// which is convenient. Put it here as a default-off config, which we can override in Xcode.
if (g_ActiveConfig.bUsePresentDrawable)
[g_state_tracker->GetRenderCmdBuf() presentDrawable:m_drawable];
else
[g_state_tracker->GetRenderCmdBuf()
addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }];
m_bb_texture->SetMTLTexture(nullptr);
m_drawable = nullptr;
}
@@ -34,7 +34,6 @@ class StateTracker
Uniform,
Vertex,
Index,
TextureData,
Texels,
Last = Texels
};
@@ -75,6 +74,14 @@ class StateTracker
return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
}
void ReloadSamplers();
void NotifyOfCPUGPUSync()
{
if (!g_features.manual_buffer_upload || !m_manual_buffer_upload)
return;
if (m_upload_cmdbuf || m_current_render_cmdbuf)
return;
SetManualBufferUpload(false);
}

void SetPipeline(const Pipeline* pipe);
void SetPipeline(const ComputePipeline* pipe);
@@ -106,6 +113,7 @@ class StateTracker
{
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
}
Map AllocateForTextureUpload(size_t amt);
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{
Preallocate(buffer_idx, amt);
@@ -119,6 +127,7 @@ class StateTracker
static_cast<size_t>(align)) == 0);
return CommitPreallocation(buffer_idx, Align(amt, align));
}
id<MTLBlitCommandEncoder> GetUploadEncoder();
id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
id<MTLCommandBuffer> GetRenderCmdBuf();

@@ -142,18 +151,28 @@ class StateTracker
void Reset(size_t new_size);
};

struct Buffer
struct CPUBuffer
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> mtlbuffer;
void* buffer = nullptr;
};

struct BufferPair
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> cpubuffer;
MRCOwned<id<MTLBuffer>> gpubuffer;
void* buffer = nullptr;
size_t last_upload = 0;
};

struct Backref;
struct PerfQueryTracker;

std::shared_ptr<Backref> m_backref;
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
MRCOwned<id<MTLFence>> m_fence;
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
@@ -165,7 +184,8 @@ class StateTracker
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
Framebuffer* m_current_framebuffer;
Buffer m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
CPUBuffer m_texture_upload_buffer;
BufferPair m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
u64 m_current_draw = 1;
std::atomic<u64> m_last_finished_draw{0};

@@ -250,9 +270,12 @@ class StateTracker
} m_state;

u32 m_perf_query_tracker_counter = 0;
bool m_manual_buffer_upload = false;

void SetManualBufferUpload(bool enable);
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
void SetSamplerForce(u32 idx, const SamplerState& sampler);
void Sync(BufferPair& buffer);
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
void CheckViewport();
void CheckScissor();
@@ -45,12 +45,11 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
// clang-format off
switch (buffer)
{
case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data";
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms";
case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms";
case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
}
// clang-format on
}
@@ -105,6 +104,7 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
{
m_flags.should_apply_label = true;
m_fence = MRCTransfer([g_device newFence]);
for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc)
{
rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
@@ -141,9 +141,10 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}

// MARK: BufferPair Ops

std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt)
{
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
amt = (amt + 15) & ~15ull;
CPUBuffer& buffer = m_texture_upload_buffer;
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
if (__builtin_expect(needs_new, false))
@@ -155,29 +156,108 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.mtlbuffer setLabel:GetName(buffer_idx)];
[buffer.mtlbuffer setLabel:@"Texture Upload Buffer"];
ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.mtlbuffer contents];
buffer.usage.Reset(newsize);
}

size_t pos = buffer.usage.Allocate(m_current_draw, amt);

Map ret = {buffer.mtlbuffer, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space");
return ret;
}

std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
{
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
size_t base_pos = buffer.usage.Pos();
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
bool needs_upload = needs_new || buffer.usage.Pos() == 0;
if (m_manual_buffer_upload && needs_upload)
{
if (base_pos != buffer.last_upload)
{
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
[encoder copyFromBuffer:buffer.cpubuffer
sourceOffset:buffer.last_upload
toBuffer:buffer.gpubuffer
destinationOffset:buffer.last_upload
size:base_pos - buffer.last_upload];
}
buffer.last_upload = 0;
}
if (__builtin_expect(needs_new, false))
{
// Orphan buffer
size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
while (newsize < amt)
newsize *= 2;
MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.cpubuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.cpubuffer contents];
buffer.usage.Reset(newsize);
if (g_features.manual_buffer_upload)
{
options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked;
buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.gpubuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
}
}
size_t pos = buffer.usage.Pos();
return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
}

Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
size_t amt)
{
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
ret.gpu_buffer = buffer.mtlbuffer;
ret.gpu_buffer = m_manual_buffer_upload ? buffer.gpubuffer : buffer.cpubuffer;
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space");
return ret;
}

void Metal::StateTracker::Sync(BufferPair& buffer)
{
if (!m_manual_buffer_upload || buffer.usage.Pos() == buffer.last_upload)
return;

id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
[encoder copyFromBuffer:buffer.cpubuffer
sourceOffset:buffer.last_upload
toBuffer:buffer.gpubuffer
destinationOffset:buffer.last_upload
size:buffer.usage.Pos() - buffer.last_upload];
buffer.last_upload = buffer.usage.Pos();
}

// MARK: Render Pass / Encoder Management

id<MTLBlitCommandEncoder> Metal::StateTracker::GetUploadEncoder()
{
if (!m_upload_cmdbuf)
{
@autoreleasepool
{
m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
[m_upload_cmdbuf setLabel:@"Vertex Upload"];
m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]);
[m_upload_encoder setLabel:@"Vertex Upload"];
}
}
return m_upload_encoder;
}

id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
{
if (!m_texture_upload_cmdbuf)
@@ -270,6 +350,8 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
if (m_current_perf_query)
[descriptor setVisibilityResultBuffer:nil];
if (m_manual_buffer_upload)
[m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
if (!attachment)
attachment = m_current_framebuffer->GetDepthAttachment();
@@ -299,6 +381,8 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
EndRenderPass();
m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
[m_current_compute_encoder setLabel:@"Compute"];
if (m_manual_buffer_upload)
[m_current_compute_encoder waitForFence:m_fence];
m_flags.NewEncoder();
m_dirty_samplers = 0xff;
m_dirty_textures = 0xff;
@@ -326,6 +410,20 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
if (!m_current_render_cmdbuf)
return;
EndRenderPass();
for (int i = 0; i <= static_cast<int>(UploadBuffer::Last); ++i)
Sync(m_upload_buffers[i]);
if (!m_manual_buffer_upload)
{
ASSERT(!m_upload_cmdbuf && "Should never be used!");
}
else if (m_upload_cmdbuf)
{
[m_upload_encoder updateFence:m_fence];
[m_upload_encoder endEncoding];
[m_upload_cmdbuf commit];
m_upload_encoder = nullptr;
m_upload_cmdbuf = nullptr;
}
if (m_texture_upload_cmdbuf)
{
[m_texture_upload_encoder endEncoding];
@@ -355,6 +453,8 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
m_current_render_cmdbuf = nullptr;
m_current_draw++;
if (g_features.manual_buffer_upload && !m_manual_buffer_upload)
SetManualBufferUpload(true);
}

void Metal::StateTracker::WaitForFlushedEncoders()
@@ -368,6 +468,23 @@ explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]);
}

void Metal::StateTracker::SetManualBufferUpload(bool enabled)
{
// When a game does something that needs CPU-GPU sync (e.g. bbox, texture download, etc),
// the next command buffer will be done with manual buffer upload disabled,
// since overlapping the upload with the previous draw won't be possible (due to sync).
// This greatly improves performance in heavy bbox games like Super Paper Mario.
m_manual_buffer_upload = enabled;
if (enabled)
{
for (BufferPair& buffer : m_upload_buffers)
{
// Update sync positions, since Sync doesn't do it when manual buffer upload is off
buffer.last_upload = buffer.usage.Pos();
}
}
}

// MARK: State Setters

void Metal::StateTracker::SetPipeline(const Pipeline* pipe)

0 comments on commit b667931

Please sign in to comment.