166 changes: 166 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLMain.mm
@@ -0,0 +1,166 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/VideoBackend.h"

#include <AppKit/AppKit.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>

#include "Common/Common.h"
#include "Common/MsgHandler.h"

#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLPerfQuery.h"
#include "VideoBackends/Metal/MTLRenderer.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoBackends/Metal/MTLVertexManager.h"

#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"

std::string Metal::VideoBackend::GetName() const
{
return NAME;
}

std::string Metal::VideoBackend::GetDisplayName() const
{
// i18n: Apple's Metal graphics API (https://developer.apple.com/metal/)
return _trans("Metal");
}

std::optional<std::string> Metal::VideoBackend::GetWarningMessage() const
{
if (Util::GetAdapterList().empty())
{
return _trans("No Metal-compatible GPUs were found. "
"Use the OpenGL backend or upgrade your computer/GPU");
}

return std::nullopt;
}

static bool WindowSystemTypeSupportsMetal(WindowSystemType type)
{
switch (type)
{
case WindowSystemType::MacOS:
return true;
default:
return false;
}
}

bool Metal::VideoBackend::Initialize(const WindowSystemInfo& wsi)
{
@autoreleasepool
{
if (!WindowSystemTypeSupportsMetal(wsi.type) || !wsi.render_surface)
{
PanicAlertFmt("Bad WindowSystemInfo for Metal renderer.");
return false;
}

auto devs = Util::GetAdapterList();
if (devs.empty())
{
PanicAlertFmt("No Metal GPUs detected.");
return false;
}

Util::PopulateBackendInfo(&g_Config);
Util::PopulateBackendInfoAdapters(&g_Config, devs);

// Since we haven't called InitializeShared yet, iAdapter may be out of range,
// so we have to check it ourselves.
size_t selected_adapter_index = static_cast<size_t>(g_Config.iAdapter);
if (selected_adapter_index >= devs.size())
{
WARN_LOG_FMT(VIDEO, "Metal adapter index out of range, selecting default adapter.");
selected_adapter_index = 0;
}
MRCOwned<id<MTLDevice>> adapter = std::move(devs[selected_adapter_index]);
Util::PopulateBackendInfoFeatures(&g_Config, adapter);

// With the backend information populated, we can now initialize videocommon.
InitializeShared();

MRCOwned<CAMetalLayer*> layer = MRCRetain(static_cast<CAMetalLayer*>(wsi.render_surface));
[layer setDevice:adapter];
if (Util::ToAbstract([layer pixelFormat]) == AbstractTextureFormat::Undefined)
[layer setPixelFormat:MTLPixelFormatBGRA8Unorm];
CGSize size = [layer bounds].size;
float scale = [layer contentsScale];

ObjectCache::Initialize(std::move(adapter));
g_state_tracker = std::make_unique<StateTracker>();
g_renderer = std::make_unique<Renderer>(std::move(layer), size.width * scale,
size.height * scale, scale);
g_vertex_manager = std::make_unique<VertexManager>();
g_perf_query = std::make_unique<PerfQuery>();
g_framebuffer_manager = std::make_unique<FramebufferManager>();
g_texture_cache = std::make_unique<TextureCacheBase>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();

if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() ||
!g_renderer->Initialize() || !g_framebuffer_manager->Initialize() ||
!g_texture_cache->Initialize())
{
PanicAlertFmt("Failed to initialize renderer classes");
Shutdown();
return false;
}

g_shader_cache->InitializeShaderCache();

return true;
}
}

void Metal::VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();

g_shader_cache.reset();
g_texture_cache.reset();
g_framebuffer_manager.reset();
g_perf_query.reset();
g_vertex_manager.reset();
g_renderer.reset();
g_state_tracker.reset();
ObjectCache::Shutdown();
ShutdownShared();
}

void Metal::VideoBackend::InitBackendInfo()
{
@autoreleasepool
{
Util::PopulateBackendInfo(&g_Config);
auto adapters = Util::GetAdapterList();
Util::PopulateBackendInfoAdapters(&g_Config, adapters);
if (!adapters.empty())
{
// Use the selected adapter, or the first to fill features.
size_t index = static_cast<size_t>(g_Config.iAdapter);
if (index >= adapters.size())
index = 0;
Util::PopulateBackendInfoFeatures(&g_Config, adapters[index]);
}
}
}

void Metal::VideoBackend::PrepareWindow(WindowSystemInfo& wsi)
{
if (wsi.type != WindowSystemType::MacOS)
return;
NSView* view = static_cast<NSView*>(wsi.render_surface);
CAMetalLayer* layer = [CAMetalLayer layer];
[view setWantsLayer:YES];
[view setLayer:layer];
wsi.render_surface = layer;
}
106 changes: 106 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLObjectCache.h
@@ -0,0 +1,106 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>
#include <memory>

#include "VideoBackends/Metal/MRCHelpers.h"

#include "VideoCommon/RenderState.h"

struct AbstractPipelineConfig;
class AbstractPipeline;

namespace Metal
{
class Shader;
extern MRCOwned<id<MTLDevice>> g_device;
extern MRCOwned<id<MTLCommandQueue>> g_queue;

struct DepthStencilSelector
{
u8 value;

DepthStencilSelector() : value(0) {}
DepthStencilSelector(bool update_enable, enum CompareMode cmp)
: value(update_enable | (static_cast<u32>(cmp) << 1))
{
}
DepthStencilSelector(DepthState state)
: DepthStencilSelector(state.testenable ? state.updateenable : false,
state.testenable ? state.func : CompareMode::Always)
{
}

bool UpdateEnable() const { return value & 1; }
enum CompareMode CompareMode() const { return static_cast<enum CompareMode>(value >> 1); }

bool operator==(const DepthStencilSelector& other) { return value == other.value; }
bool operator!=(const DepthStencilSelector& other) { return !(*this == other); }
static constexpr size_t N_VALUES = 1 << 4;
};

struct SamplerSelector
{
u8 value;
SamplerSelector() : value(0) {}
SamplerSelector(SamplerState state)
{
value = (static_cast<u32>(state.tm0.min_filter.Value()) << 0) |
(static_cast<u32>(state.tm0.mag_filter.Value()) << 1) |
(static_cast<u32>(state.tm0.mipmap_filter.Value()) << 2) |
(static_cast<u32>(state.tm0.anisotropic_filtering) << 3);
value |= (static_cast<u32>(state.tm0.wrap_u.Value()) +
3 * static_cast<u32>(state.tm0.wrap_v.Value()))
<< 4;
}
FilterMode MinFilter() const { return static_cast<FilterMode>(value & 1); }
FilterMode MagFilter() const { return static_cast<FilterMode>((value >> 1) & 1); }
FilterMode MipFilter() const { return static_cast<FilterMode>((value >> 2) & 1); }
WrapMode WrapU() const { return static_cast<WrapMode>((value >> 4) % 3); }
WrapMode WrapV() const { return static_cast<WrapMode>((value >> 4) / 3); }
bool AnisotropicFiltering() const { return ((value >> 3) & 1); }

bool operator==(const SamplerSelector& other) { return value == other.value; }
bool operator!=(const SamplerSelector& other) { return !(*this == other); }
static constexpr size_t N_VALUES = (1 << 4) * 9;
};

class ObjectCache
{
ObjectCache();

public:
~ObjectCache();

static void Initialize(MRCOwned<id<MTLDevice>> device);
static void Shutdown();

id<MTLDepthStencilState> GetDepthStencil(DepthStencilSelector sel) { return m_dss[sel.value]; }

id<MTLSamplerState> GetSampler(SamplerSelector sel)
{
if (__builtin_expect(!m_samplers[sel.value], false))
m_samplers[sel.value] = CreateSampler(sel);
return m_samplers[sel.value];
}

id<MTLSamplerState> GetSampler(SamplerState state) { return GetSampler(SamplerSelector(state)); }

void ReloadSamplers();

std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config);
void ShaderDestroyed(const Shader* shader);

private:
class Internal;
std::unique_ptr<Internal> m_internal;
MRCOwned<id<MTLSamplerState>> CreateSampler(SamplerSelector sel);
MRCOwned<id<MTLDepthStencilState>> m_dss[DepthStencilSelector::N_VALUES];
MRCOwned<id<MTLSamplerState>> m_samplers[SamplerSelector::N_VALUES];
};

extern std::unique_ptr<ObjectCache> g_object_cache;
} // namespace Metal
500 changes: 500 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLObjectCache.mm

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLPerfQuery.h
@@ -0,0 +1,34 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <condition_variable>
#include <mutex>

#include "VideoCommon/PerfQueryBase.h"

namespace Metal
{
class PerfQuery final : public PerfQueryBase
{
public:
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;

/// Notify PerfQuery of a new pending encoder
/// One call to ReturnResults should be made for every call to IncCount
void IncCount() { m_query_count.fetch_add(1, std::memory_order_relaxed); }
/// May be called from any thread
void ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, u32 query_id);

private:
u32 m_current_query = 0;
std::mutex m_results_mtx;
std::condition_variable m_cv;
};
} // namespace Metal
90 changes: 90 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLPerfQuery.mm
@@ -0,0 +1,90 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/MTLPerfQuery.h"

#include "VideoBackends/Metal/MTLStateTracker.h"

void Metal::PerfQuery::EnableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
g_state_tracker->EnablePerfQuery(type, m_current_query);
}

void Metal::PerfQuery::DisableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
g_state_tracker->DisablePerfQuery();
}

void Metal::PerfQuery::ResetQuery()
{
std::lock_guard<std::mutex> lock(m_results_mtx);
m_current_query++;
for (std::atomic<u32>& result : m_results)
result.store(0, std::memory_order_relaxed);
}

u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}

return result;
}

void Metal::PerfQuery::FlushResults()
{
if (IsFlushed())
return;

// There's a possibility that some active performance queries are unflushed
g_state_tracker->FlushEncoders();

std::unique_lock<std::mutex> lock(m_results_mtx);
while (!IsFlushed())
m_cv.wait(lock);
}

bool Metal::PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_acquire) == 0;
}

void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
u32 query_id)
{
{
std::lock_guard<std::mutex> lock(m_results_mtx);
if (m_current_query == query_id)
{
for (size_t i = 0; i < count; ++i)
{
u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());

native_res_result /= g_ActiveConfig.iMultisamples;

m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed);
}
}
m_query_count.fetch_sub(1, std::memory_order_release);
}
m_cv.notify_one();
}
71 changes: 71 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLPipeline.h
@@ -0,0 +1,71 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>

#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLShader.h"

#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"

namespace Metal
{
struct PipelineReflection
{
u32 textures = 0;
u32 samplers = 0;
u32 vertex_buffers = 0;
u32 fragment_buffers = 0;
PipelineReflection() = default;
explicit PipelineReflection(MTLRenderPipelineReflection* reflection);
};

class Pipeline final : public AbstractPipeline
{
public:
explicit Pipeline(MRCOwned<id<MTLRenderPipelineState>> pipeline,
const PipelineReflection& reflection, MTLPrimitiveType prim, MTLCullMode cull,
DepthState depth, AbstractPipelineUsage usage);

id<MTLRenderPipelineState> Get() const { return m_pipeline; }
MTLPrimitiveType Prim() const { return m_prim; }
MTLCullMode Cull() const { return m_cull; }
DepthStencilSelector DepthStencil() const { return m_depth_stencil; }
AbstractPipelineUsage Usage() const { return m_usage; }
u32 GetTextures() const { return m_reflection.textures; }
u32 GetSamplers() const { return m_reflection.samplers; }
u32 GetVertexBuffers() const { return m_reflection.vertex_buffers; }
u32 GetFragmentBuffers() const { return m_reflection.fragment_buffers; }
bool UsesVertexBuffer(u32 index) const { return m_reflection.vertex_buffers & (1 << index); }
bool UsesFragmentBuffer(u32 index) const { return m_reflection.fragment_buffers & (1 << index); }

private:
MRCOwned<id<MTLRenderPipelineState>> m_pipeline;
MTLPrimitiveType m_prim;
MTLCullMode m_cull;
DepthStencilSelector m_depth_stencil;
AbstractPipelineUsage m_usage;
PipelineReflection m_reflection;
};

class ComputePipeline : public Shader
{
public:
explicit ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection,
std::string msl, MRCOwned<id<MTLFunction>> shader,
MRCOwned<id<MTLComputePipelineState>> pipeline);

id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
bool UsesTexture(u32 index) const { return m_textures & (1 << index); }
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }

private:
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
u32 m_textures = 0;
u32 m_buffers = 0;
};
} // namespace Metal
70 changes: 70 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLPipeline.mm
@@ -0,0 +1,70 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/MTLPipeline.h"

#include "Common/MsgHandler.h"

static void MarkAsUsed(u32* list, u32 start, u32 length)
{
for (u32 i = start; i < start + length; ++i)
*list |= 1 << i;
}

static void GetArguments(NSArray<MTLArgument*>* arguments, u32* textures, u32* samplers,
u32* buffers)
{
for (MTLArgument* argument in arguments)
{
const u32 idx = [argument index];
const u32 length = [argument arrayLength];
if (idx + length > 32)
{
PanicAlertFmt("Making a MTLPipeline with high argument index {:d}..<{:d} for {:s}", //
idx, idx + length, [[argument name] UTF8String]);
continue;
}
switch ([argument type])
{
case MTLArgumentTypeTexture:
if (textures)
MarkAsUsed(textures, idx, length);
else
PanicAlertFmt("Vertex function wants a texture!");
break;
case MTLArgumentTypeSampler:
if (samplers)
MarkAsUsed(samplers, idx, length);
else
PanicAlertFmt("Vertex function wants a sampler!");
break;
case MTLArgumentTypeBuffer:
MarkAsUsed(buffers, idx, length);
break;
default:
break;
}
}
}

Metal::PipelineReflection::PipelineReflection(MTLRenderPipelineReflection* reflection)
{
GetArguments([reflection vertexArguments], nullptr, nullptr, &vertex_buffers);
GetArguments([reflection fragmentArguments], &textures, &samplers, &fragment_buffers);
}

Metal::Pipeline::Pipeline(MRCOwned<id<MTLRenderPipelineState>> pipeline,
const PipelineReflection& reflection, MTLPrimitiveType prim,
MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage)
: m_pipeline(std::move(pipeline)), m_prim(prim), m_cull(cull), m_depth_stencil(depth),
m_usage(usage), m_reflection(reflection)
{
}

Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection,
std::string msl, MRCOwned<id<MTLFunction>> shader,
MRCOwned<id<MTLComputePipelineState>> pipeline)
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
{
GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers);
}
90 changes: 90 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLRenderer.h
@@ -0,0 +1,90 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>

#include "VideoCommon/RenderBase.h"

#include "VideoBackends/Metal/MRCHelpers.h"

namespace Metal
{
class Framebuffer;
class Texture;

class Renderer final : public ::Renderer
{
public:
Renderer(MRCOwned<CAMetalLayer*> layer, int width, int height, float layer_scale);
~Renderer() override;

bool IsHeadless() const override;

bool Initialize() override;

std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config,
std::string_view name) override;
std::unique_ptr<AbstractStagingTexture>
CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override;
std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override;

std::unique_ptr<AbstractShader> CreateShaderFromSource(ShaderStage stage, std::string_view source,
std::string_view name) override;
std::unique_ptr<AbstractShader> CreateShaderFromBinary(ShaderStage stage, const void* data,
size_t length,
std::string_view name) override;
std::unique_ptr<AbstractShader> CreateShaderFromMSL(ShaderStage stage, std::string msl,
std::string_view glsl, std::string_view name);
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data = nullptr,
size_t cache_data_length = 0) override;

void Flush() override;
void WaitForGPUIdle() override;
void OnConfigChanged(u32 bits) override;

void ClearScreen(const MathUtil::Rectangle<int>& rc, bool color_enable, bool alpha_enable,
bool z_enable, u32 color, u32 z) override;

void SetPipeline(const AbstractPipeline* pipeline) override;
void SetFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override;
void UnbindTexture(const AbstractTexture* texture) override;
void SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

protected:
std::unique_ptr<::BoundingBox> CreateBoundingBox() const override;

private:
MRCOwned<CAMetalLayer*> m_layer;
MRCOwned<id<CAMetalDrawable>> m_drawable;
std::unique_ptr<Texture> m_bb_texture;
std::unique_ptr<Framebuffer> m_backbuffer;
u32 m_texture_counter = 0;
u32 m_staging_texture_counter = 0;
std::array<u32, 4> m_shader_counter = {};

void CheckForSurfaceChange();
void CheckForSurfaceResize();
void SetupSurface();
};
} // namespace Metal
502 changes: 502 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLRenderer.mm

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLShader.h
@@ -0,0 +1,28 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>

#include "VideoBackends/Metal/MRCHelpers.h"

#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"

namespace Metal
{
class Shader : public AbstractShader
{
public:
explicit Shader(ShaderStage stage, std::string msl, MRCOwned<id<MTLFunction>> shader);
~Shader();

id<MTLFunction> GetShader() const { return m_shader; }
BinaryData GetBinary() const override;

private:
std::string m_msl;
MRCOwned<id<MTLFunction>> m_shader;
};
} // namespace Metal
19 changes: 19 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLShader.mm
@@ -0,0 +1,19 @@

#include "VideoBackends/Metal/MTLShader.h"

#include "VideoBackends/Metal/MTLObjectCache.h"

Metal::Shader::Shader(ShaderStage stage, std::string msl, MRCOwned<id<MTLFunction>> shader)
: AbstractShader(stage), m_msl(std::move(msl)), m_shader(std::move(shader))
{
}

Metal::Shader::~Shader()
{
g_object_cache->ShaderDestroyed(this);
}

AbstractShader::BinaryData Metal::Shader::GetBinary() const
{
return BinaryData(m_msl.begin(), m_msl.end());
}
263 changes: 263 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLStateTracker.h
@@ -0,0 +1,263 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>
#include <atomic>
#include <memory>
#include <vector>

#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"

#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLTexture.h"
#include "VideoBackends/Metal/MTLUtil.h"

#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/RenderBase.h"

namespace Metal
{
class Pipeline;
class ComputePipeline;

class StateTracker
{
public:
enum class UploadBuffer
{
Other,
Uniform,
Vertex,
Index,
TextureData,
Texels,
Last = Texels
};

struct Map
{
id<MTLBuffer> gpu_buffer;
size_t gpu_offset;
void* cpu_buffer;
};

enum class AlignMask : size_t
{
None = 0,
Other = 15,
Uniform = 255,
};

StateTracker(StateTracker&&) = delete;
explicit StateTracker();
~StateTracker();

Framebuffer* GetCurrentFramebuffer() { return m_current_framebuffer; };
void SetCurrentFramebuffer(Framebuffer* framebuffer);
void BeginClearRenderPass(MTLClearColor color, float depth);
void BeginRenderPass(MTLLoadAction load_action);
void BeginRenderPass(MTLRenderPassDescriptor* descriptor);
void BeginComputePass();
MTLRenderPassDescriptor* GetRenderPassDescriptor(Framebuffer* framebuffer,
MTLLoadAction load_action);

void EndRenderPass();
void FlushEncoders();
void WaitForFlushedEncoders();
bool HasUnflushedData() { return static_cast<bool>(m_current_render_cmdbuf); }
bool GPUBusy()
{
return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
}
void ReloadSamplers();

void SetPipeline(const Pipeline* pipe);
void SetPipeline(const ComputePipeline* pipe);
void SetScissor(const MathUtil::Rectangle<int>& rect);
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
void SetTexture(u32 idx, id<MTLTexture> texture);
void SetSampler(u32 idx, const SamplerState& sampler);
void SetComputeTexture(const Texture* texture);
void InvalidateUniforms(bool vertex, bool fragment);
void SetUtilityUniform(const void* buffer, size_t size);
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
void SetVerticesAndIndices(id<MTLBuffer> vertices, id<MTLBuffer> indices);
void SetBBoxBuffer(id<MTLBuffer> bbox, id<MTLFence> upload, id<MTLFence> download);
void SetVertexBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
void SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
/// Use around utility draws that are commonly used immediately before gx draws to the same buffer
void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; }
void EnablePerfQuery(PerfQueryGroup group, u32 query_id);
void DisablePerfQuery();
void UnbindTexture(id<MTLTexture> texture);

void Draw(u32 base_vertex, u32 num_vertices);
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex);
void DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, u32 groups_x,
u32 groups_y, u32 groups_z);
void ResolveTexture(id<MTLTexture> src, id<MTLTexture> dst, u32 layer, u32 level);

size_t Align(size_t amt, AlignMask align)
{
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
}
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{
Preallocate(buffer_idx, amt);
return CommitPreallocation(buffer_idx, amt, align);
}
std::pair<void*, size_t> Preallocate(UploadBuffer buffer_idx, size_t amt);
/// Must follow a call to Preallocate where amt is >= to the one provided here
Map CommitPreallocation(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{
DEBUG_ASSERT((m_upload_buffers[static_cast<int>(buffer_idx)].usage.Pos() &
static_cast<size_t>(align)) == 0);
return CommitPreallocation(buffer_idx, Align(amt, align));
}
id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
id<MTLCommandBuffer> GetRenderCmdBuf();

private:
class UsageTracker
{
struct UsageEntry
{
u64 drawno;
size_t pos;
};
std::vector<UsageEntry> m_usage;
size_t m_size = 0;
size_t m_pos = 0;

public:
size_t Size() { return m_size; }
size_t Pos() { return m_pos; }
bool PrepareForAllocation(u64 last_draw, size_t amt);
size_t Allocate(u64 current_draw, size_t amt);
void Reset(size_t new_size);
};

struct Buffer
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> mtlbuffer;
void* buffer = nullptr;
};

struct Backref;
struct PerfQueryTracker;

std::shared_ptr<Backref> m_backref;
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_texture_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_current_render_cmdbuf;
MRCOwned<id<MTLCommandBuffer>> m_last_render_cmdbuf;
MRCOwned<id<MTLRenderCommandEncoder>> m_current_render_encoder;
MRCOwned<id<MTLComputeCommandEncoder>> m_current_compute_encoder;
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
Framebuffer* m_current_framebuffer;
Buffer m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
u64 m_current_draw = 1;
std::atomic<u64> m_last_finished_draw{0};

MRCOwned<id<MTLTexture>> m_dummy_texture;

// MARK: State
u8 m_dirty_textures;
u8 m_dirty_samplers;
union Flags
{
struct
{
// clang-format off
bool has_gx_vs_uniform : 1;
bool has_gx_ps_uniform : 1;
bool has_utility_vs_uniform : 1;
bool has_utility_ps_uniform : 1;
bool has_compute_texture : 1;
bool has_pipeline : 1;
bool has_scissor : 1;
bool has_viewport : 1;
bool has_vertices : 1;
bool has_texel_buffer : 1;
bool bbox_fence : 1;
bool should_apply_label : 1;
// clang-format on
};
u16 bits = 0;
void NewEncoder()
{
Flags reset_mask;
// Set the flags you *don't* want to reset
reset_mask.should_apply_label = true;
bits &= reset_mask.bits;
}
} m_flags;

/// Things that represent the state of the encoder
struct Current
{
NSString* label;
id<MTLRenderPipelineState> pipeline;
std::array<id<MTLBuffer>, 2> vertex_buffers;
std::array<id<MTLBuffer>, 2> fragment_buffers;
u32 width;
u32 height;
MathUtil::Rectangle<int> scissor_rect;
Util::Viewport viewport;
MTLDepthClipMode depth_clip_mode;
MTLCullMode cull_mode;
DepthStencilSelector depth_stencil;
PerfQueryGroup perf_query_group;
} m_current;
std::shared_ptr<PerfQueryTracker> m_current_perf_query;

/// Things that represent what we'd *like* to have on the encoder for the next draw
struct State
{
MathUtil::Rectangle<int> scissor_rect;
Util::Viewport viewport;
const Pipeline* render_pipeline = nullptr;
const ComputePipeline* compute_pipeline = nullptr;
std::array<id<MTLTexture>, 8> textures = {};
std::array<id<MTLSamplerState>, 8> samplers = {};
std::array<float, 8> sampler_min_lod;
std::array<float, 8> sampler_max_lod;
std::array<SamplerState, 8> sampler_states;
const Texture* compute_texture = nullptr;
std::unique_ptr<u8[]> utility_uniform;
u32 utility_uniform_size = 0;
u32 utility_uniform_capacity = 0;
id<MTLBuffer> bbox = nullptr;
id<MTLFence> bbox_upload_fence = nullptr;
id<MTLFence> bbox_download_fence = nullptr;
id<MTLBuffer> vertices = nullptr;
id<MTLBuffer> indices = nullptr;
id<MTLBuffer> texels = nullptr;
u32 texel_buffer_offset0;
u32 texel_buffer_offset1;
PerfQueryGroup perf_query_group = static_cast<PerfQueryGroup>(-1);
} m_state;

u32 m_perf_query_tracker_counter = 0;

std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
void SetSamplerForce(u32 idx, const SamplerState& sampler);
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
void CheckViewport();
void CheckScissor();
void PrepareRender();
void PrepareCompute();
};

extern std::unique_ptr<StateTracker> g_state_tracker;
} // namespace Metal
840 changes: 840 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLStateTracker.mm

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLTexture.h
@@ -0,0 +1,77 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>

#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/AbstractTexture.h"

#include "VideoBackends/Metal/MRCHelpers.h"

namespace Metal
{
class Texture final : public AbstractTexture
{
public:
explicit Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config);
~Texture();

void CopyRectangleFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect, u32 src_layer,
u32 src_level, const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level) override;
void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& rect,
u32 layer, u32 level) override;
void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size) override;

id<MTLTexture> GetMTLTexture() const { return m_tex; }
void SetMTLTexture(MRCOwned<id<MTLTexture>> tex) { m_tex = std::move(tex); }

private:
MRCOwned<id<MTLTexture>> m_tex;
};

class StagingTexture final : public AbstractStagingTexture
{
public:
StagingTexture(MRCOwned<id<MTLBuffer>> buffer, StagingTextureType type,
const TextureConfig& config);
~StagingTexture();

void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& src_rect,
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect) override;
void CopyToTexture(const MathUtil::Rectangle<int>& src_rect, AbstractTexture* dst,
const MathUtil::Rectangle<int>& dst_rect, u32 dst_layer,
u32 dst_level) override;

bool Map() override;
void Unmap() override;
void Flush() override;

private:
MRCOwned<id<MTLBuffer>> m_buffer;
MRCOwned<id<MTLCommandBuffer>> m_wait_buffer;
};

class Framebuffer final : public AbstractFramebuffer
{
public:
Framebuffer(AbstractTexture* color, AbstractTexture* depth, u32 width, u32 height, u32 layers,
u32 samples);
~Framebuffer();

id<MTLTexture> GetColor() const
{
return static_cast<Texture*>(GetColorAttachment())->GetMTLTexture();
}
id<MTLTexture> GetDepth() const
{
return static_cast<Texture*>(GetDepthAttachment())->GetMTLTexture();
}
};
} // namespace Metal
180 changes: 180 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLTexture.mm
@@ -0,0 +1,180 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/MTLTexture.h"

#include "Common/Align.h"
#include "Common/Assert.h"

#include "VideoBackends/Metal/MTLStateTracker.h"

Metal::Texture::Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config)
: AbstractTexture(config), m_tex(std::move(tex))
{
}

Metal::Texture::~Texture()
{
if (g_state_tracker)
g_state_tracker->UnbindTexture(m_tex);
}

void Metal::Texture::CopyRectangleFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect,
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level)
{
g_state_tracker->EndRenderPass();
id<MTLTexture> msrc = static_cast<const Texture*>(src)->GetMTLTexture();
id<MTLBlitCommandEncoder> blit = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder];
MTLSize size = MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1);
[blit setLabel:@"Texture Copy"];
[blit copyFromTexture:msrc
sourceSlice:src_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0)
sourceSize:size
toTexture:m_tex
destinationSlice:dst_layer
destinationLevel:dst_level
destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)];
[blit endEncoding];
}

void Metal::Texture::ResolveFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& rect, u32 layer, u32 level)
{
ASSERT(rect == MathUtil::Rectangle<int>(0, 0, src->GetWidth(), src->GetHeight()));
id<MTLTexture> src_tex = static_cast<const Texture*>(src)->GetMTLTexture();
g_state_tracker->ResolveTexture(src_tex, m_tex, layer, level);
}

void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, //
const u8* buffer, size_t buffer_size)
{
@autoreleasepool
{
const u32 block_size = GetBlockSizeForFormat(GetFormat());
const u32 num_rows = Common::AlignUp(height, block_size) / block_size;
const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length);
const u32 upload_size = source_pitch * num_rows;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData,
upload_size, StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, buffer, upload_size);
id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder();
[encoder copyFromBuffer:map.gpu_buffer
sourceOffset:map.gpu_offset
sourceBytesPerRow:source_pitch
sourceBytesPerImage:upload_size
sourceSize:MTLSizeMake(width, height, 1)
toTexture:m_tex
destinationSlice:0
destinationLevel:level
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
}

Metal::StagingTexture::StagingTexture(MRCOwned<id<MTLBuffer>> buffer, StagingTextureType type,
const TextureConfig& config)
: AbstractStagingTexture(type, config), m_buffer(std::move(buffer))
{
m_map_pointer = static_cast<char*>([m_buffer contents]);
m_map_stride = config.GetStride();
}

Metal::StagingTexture::~StagingTexture() = default;

void Metal::StagingTexture::CopyFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect, //
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect)
{
@autoreleasepool
{
const size_t stride = m_config.GetStride();
const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size;
const MTLSize size =
MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1);
g_state_tracker->EndRenderPass();
m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf());
id<MTLBlitCommandEncoder> download_encoder = [m_wait_buffer blitCommandEncoder];
[download_encoder setLabel:@"Texture Download"];
[download_encoder copyFromTexture:static_cast<const Texture*>(src)->GetMTLTexture()
sourceSlice:src_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0)
sourceSize:size
toBuffer:m_buffer
destinationOffset:offset
destinationBytesPerRow:stride
destinationBytesPerImage:stride * size.height];
[download_encoder endEncoding];
m_needs_flush = true;
}
}

void Metal::StagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, //
AbstractTexture* dst,
const MathUtil::Rectangle<int>& dst_rect, //
u32 dst_layer, u32 dst_level)
{
@autoreleasepool
{
const size_t stride = m_config.GetStride();
const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size;
const MTLSize size =
MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1);
g_state_tracker->EndRenderPass();
m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf());
id<MTLBlitCommandEncoder> upload_encoder = [m_wait_buffer blitCommandEncoder];
[upload_encoder setLabel:@"Texture Upload"];
[upload_encoder copyFromBuffer:m_buffer
sourceOffset:offset
sourceBytesPerRow:stride
sourceBytesPerImage:stride * size.height
sourceSize:size
toTexture:static_cast<Texture*>(dst)->GetMTLTexture()
destinationSlice:dst_layer
destinationLevel:dst_level
destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)];
[upload_encoder endEncoding];
m_needs_flush = true;
}
}

bool Metal::StagingTexture::Map()
{
// Always mapped
return true;
}

void Metal::StagingTexture::Unmap()
{
// Always mapped
}

void Metal::StagingTexture::Flush()
{
m_needs_flush = false;
if (!m_wait_buffer)
return;
if ([m_wait_buffer status] != MTLCommandBufferStatusCompleted)
{
// Flush while we wait, since who knows how long we'll be sitting here
g_state_tracker->FlushEncoders();
[m_wait_buffer waitUntilCompleted];
}
m_wait_buffer = nullptr;
}

Metal::Framebuffer::Framebuffer(AbstractTexture* color, AbstractTexture* depth, //
u32 width, u32 height, u32 layers, u32 samples)
: AbstractFramebuffer(color, depth,
color ? color->GetFormat() : AbstractTextureFormat::Undefined, //
depth ? depth->GetFormat() : AbstractTextureFormat::Undefined, //
width, height, layers, samples)
{
}

Metal::Framebuffer::~Framebuffer() = default;
53 changes: 53 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLUtil.h
@@ -0,0 +1,53 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>
#include <vector>

#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/TextureConfig.h"
#include "VideoCommon/VideoConfig.h"

#include "VideoBackends/Metal/MRCHelpers.h"

namespace Metal
{
struct DeviceFeatures
{
bool subgroup_ops;
};

extern DeviceFeatures g_features;

namespace Util
{
struct Viewport
{
float x;
float y;
float width;
float height;
float near_depth;
float far_depth;
};

/// Gets the list of Metal devices, ordered so the system default device is first
std::vector<MRCOwned<id<MTLDevice>>> GetAdapterList();
void PopulateBackendInfo(VideoConfig* config);
void PopulateBackendInfoAdapters(VideoConfig* config,
const std::vector<MRCOwned<id<MTLDevice>>>& adapters);
void PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice> device);

AbstractTextureFormat ToAbstract(MTLPixelFormat format);
MTLPixelFormat FromAbstract(AbstractTextureFormat format);
static inline bool HasStencil(AbstractTextureFormat format)
{
return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8;
}

std::optional<std::string> TranslateShaderToMSL(ShaderStage stage, std::string_view source);

} // namespace Util
} // namespace Metal
464 changes: 464 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLUtil.mm

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLVertexFormat.h
@@ -0,0 +1,23 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <Metal/Metal.h>

#include "VideoBackends/Metal/MRCHelpers.h"

#include "VideoCommon/NativeVertexFormat.h"

namespace Metal
{
class VertexFormat : public NativeVertexFormat
{
public:
VertexFormat(const PortableVertexDeclaration& vtx_decl);

MTLVertexDescriptor* Get() const { return m_desc; }

MRCOwned<MTLVertexDescriptor*> m_desc;
};
} // namespace Metal
143 changes: 143 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLVertexFormat.mm
@@ -0,0 +1,143 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/MTLVertexFormat.h"

#include "VideoCommon/VertexShaderGen.h"

static MTLVertexFormat ConvertFormat(ComponentFormat format, int count, bool int_format)
{
// clang-format off
if (int_format)
{
switch (format)
{
case ComponentFormat::UByte:
switch (count)
{
case 1: return MTLVertexFormatUChar;
case 2: return MTLVertexFormatUChar2;
case 3: return MTLVertexFormatUChar3;
case 4: return MTLVertexFormatUChar4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Byte:
switch (count)
{
case 1: return MTLVertexFormatChar;
case 2: return MTLVertexFormatChar2;
case 3: return MTLVertexFormatChar3;
case 4: return MTLVertexFormatChar4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::UShort:
switch (count)
{
case 1: return MTLVertexFormatUShort;
case 2: return MTLVertexFormatUShort2;
case 3: return MTLVertexFormatUShort3;
case 4: return MTLVertexFormatUShort4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Short:
switch (count)
{
case 1: return MTLVertexFormatShort;
case 2: return MTLVertexFormatShort2;
case 3: return MTLVertexFormatShort3;
case 4: return MTLVertexFormatShort4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Float:
switch (count)
{
case 1: return MTLVertexFormatFloat;
case 2: return MTLVertexFormatFloat2;
case 3: return MTLVertexFormatFloat3;
case 4: return MTLVertexFormatFloat4;
default: return MTLVertexFormatInvalid;
}
}
}
else
{
switch (format)
{
case ComponentFormat::UByte:
switch (count)
{
case 1: return MTLVertexFormatUCharNormalized;
case 2: return MTLVertexFormatUChar2Normalized;
case 3: return MTLVertexFormatUChar3Normalized;
case 4: return MTLVertexFormatUChar4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Byte:
switch (count)
{
case 1: return MTLVertexFormatCharNormalized;
case 2: return MTLVertexFormatChar2Normalized;
case 3: return MTLVertexFormatChar3Normalized;
case 4: return MTLVertexFormatChar4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::UShort:
switch (count)
{
case 1: return MTLVertexFormatUShortNormalized;
case 2: return MTLVertexFormatUShort2Normalized;
case 3: return MTLVertexFormatUShort3Normalized;
case 4: return MTLVertexFormatUShort4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Short:
switch (count)
{
case 1: return MTLVertexFormatShortNormalized;
case 2: return MTLVertexFormatShort2Normalized;
case 3: return MTLVertexFormatShort3Normalized;
case 4: return MTLVertexFormatShort4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Float:
switch (count)
{
case 1: return MTLVertexFormatFloat;
case 2: return MTLVertexFormatFloat2;
case 3: return MTLVertexFormatFloat3;
case 4: return MTLVertexFormatFloat4;
default: return MTLVertexFormatInvalid;
}
}
}
// clang-format on
}

static void SetAttribute(MTLVertexDescriptor* desc, u32 attribute, const AttributeFormat& format)
{
if (!format.enable)
return;
MTLVertexAttributeDescriptor* attr_desc = [[desc attributes] objectAtIndexedSubscript:attribute];
[attr_desc setFormat:ConvertFormat(format.type, format.components, format.integer)];
[attr_desc setOffset:format.offset];
[attr_desc setBufferIndex:0];
}

template <size_t N>
static void SetAttributes(MTLVertexDescriptor* desc, u32 attribute,
const AttributeFormat (&format)[N])
{
for (size_t i = 0; i < N; ++i)
SetAttribute(desc, attribute + i, format[i]);
}

Metal::VertexFormat::VertexFormat(const PortableVertexDeclaration& vtx_decl)
: NativeVertexFormat(vtx_decl), m_desc(MRCTransfer([MTLVertexDescriptor new]))
{
[[[m_desc layouts] objectAtIndexedSubscript:0] setStride:vtx_decl.stride];
SetAttribute(m_desc, SHADER_POSITION_ATTRIB, vtx_decl.position);
SetAttributes(m_desc, SHADER_NORMAL_ATTRIB, vtx_decl.normals);
SetAttributes(m_desc, SHADER_COLOR0_ATTRIB, vtx_decl.colors);
SetAttributes(m_desc, SHADER_TEXTURE0_ATTRIB, vtx_decl.texcoords);
SetAttribute(m_desc, SHADER_POSMTX_ATTRIB, vtx_decl.posmtx);
}
34 changes: 34 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLVertexManager.h
@@ -0,0 +1,34 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoCommon/VertexManagerBase.h"

namespace Metal
{
class VertexManager final : public VertexManagerBase
{
public:
VertexManager();
~VertexManager() override;

void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset,
const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset) override;

protected:
void ResetBuffer(u32 vertex_stride) override;
void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex,
u32* out_base_index) override;
void UploadUniforms() override;

private:
u32 m_vertex_offset;
u32 m_base_vertex;
};
} // namespace Metal
94 changes: 94 additions & 0 deletions Source/Core/VideoBackends/Metal/MTLVertexManager.mm
@@ -0,0 +1,94 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/MTLVertexManager.h"

#include "VideoBackends/Metal/MTLStateTracker.h"

#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexShaderManager.h"

Metal::VertexManager::VertexManager()
{
}

Metal::VertexManager::~VertexManager() = default;

void Metal::VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
g_state_tracker->SetUtilityUniform(uniforms, uniforms_size);
}

bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size,
TexelBufferFormat format, u32* out_offset)
{
*out_offset = 0;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, data_size,
StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, data, data_size);
g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset, 0);
return true;
}

bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size,
TexelBufferFormat format, u32* out_offset,
const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format,
u32* out_palette_offset)
{
*out_offset = 0;
*out_palette_offset = 0;

const u32 aligned_data_size = g_state_tracker->Align(data_size, StateTracker::AlignMask::Other);
const u32 total_size = aligned_data_size + palette_size;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, total_size,
StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, data, data_size);
memcpy(static_cast<char*>(map.cpu_buffer) + aligned_data_size, palette_data, palette_size);
g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset,
map.gpu_offset + aligned_data_size);
return true;
}

void Metal::VertexManager::ResetBuffer(u32 vertex_stride)
{
const u32 max_vertex_size = 65535 * vertex_stride;
const u32 vertex_alloc = max_vertex_size + vertex_stride - 1; // for alignment
auto vertex = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Vertex, vertex_alloc);
auto index =
g_state_tracker->Preallocate(StateTracker::UploadBuffer::Index, MAXIBUFFERSIZE * sizeof(u16));

// Align the base vertex
m_base_vertex = (vertex.second + vertex_stride - 1) / vertex_stride;
m_vertex_offset = m_base_vertex * vertex_stride - vertex.second;
m_cur_buffer_pointer = m_base_buffer_pointer = static_cast<u8*>(vertex.first) + m_vertex_offset;
m_end_buffer_pointer = m_base_buffer_pointer + max_vertex_size;
m_index_generator.Start(static_cast<u16*>(index.first));
}

void Metal::VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
{
const u32 vsize = num_vertices * vertex_stride + m_vertex_offset;
const u32 isize = num_indices * sizeof(u16);
StateTracker::Map vmap = g_state_tracker->CommitPreallocation(
StateTracker::UploadBuffer::Vertex, vsize, StateTracker::AlignMask::None);
StateTracker::Map imap = g_state_tracker->CommitPreallocation(
StateTracker::UploadBuffer::Index, isize, StateTracker::AlignMask::None);

ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, vsize);
ADDSTAT(g_stats.this_frame.bytes_index_streamed, isize);

DEBUG_ASSERT(vmap.gpu_offset + m_vertex_offset == m_base_vertex * vertex_stride);
g_state_tracker->SetVerticesAndIndices(vmap.gpu_buffer, imap.gpu_buffer);
*out_base_vertex = m_base_vertex;
*out_base_index = imap.gpu_offset / sizeof(u16);
}

void Metal::VertexManager::UploadUniforms()
{
g_state_tracker->InvalidateUniforms(VertexShaderManager::dirty, PixelShaderManager::dirty);
VertexShaderManager::dirty = false;
PixelShaderManager::dirty = false;
}
27 changes: 27 additions & 0 deletions Source/Core/VideoBackends/Metal/VideoBackend.h
@@ -0,0 +1,27 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <string>
#include "VideoCommon/VideoBackendBase.h"

namespace Metal
{
class VideoBackend : public VideoBackendBase
{
public:
bool Initialize(const WindowSystemInfo& wsi) override;
void Shutdown() override;

std::string GetName() const override;
std::string GetDisplayName() const override;
std::optional<std::string> GetWarningMessage() const override;

void InitBackendInfo() override;

void PrepareWindow(WindowSystemInfo& wsi) override;

static constexpr const char* NAME = "Metal";
};
} // namespace Metal
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/Null/NullBackend.cpp
Expand Up @@ -59,6 +59,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsTextureQueryLevels = false;
g_Config.backend_info.bSupportsLodBiasInSampler = false;
g_Config.backend_info.bSupportsSettingObjectNames = false;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;

// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/OGL/OGLMain.cpp
Expand Up @@ -93,6 +93,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;

// TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics
// options will show the option when it is not supported. The only way around this would be
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/OGL/OGLRender.cpp
Expand Up @@ -903,8 +903,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
}
}

void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
glUseProgram(static_cast<const OGLShader*>(shader)->GetGLComputeProgramID());
glDispatchCompute(groups_x, groups_y, groups_z);
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/OGL/OGLRender.h
Expand Up @@ -125,8 +125,8 @@ class Renderer : public ::Renderer
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/Software/SWmain.cpp
Expand Up @@ -88,6 +88,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsTextureQueryLevels = false;
g_Config.backend_info.bSupportsLodBiasInSampler = false;
g_Config.backend_info.bSupportsSettingObjectNames = false;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;

// aamodes
g_Config.backend_info.AAModes = {1};
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/Vulkan/VKRenderer.cpp
Expand Up @@ -155,7 +155,7 @@ void Renderer::ClearScreen(const MathUtil::Rectangle<int>& rc, bool color_enable
bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 ||
bpmem.zcontrol.pixel_format == PixelFormat::Z24)
{
// Force alpha writes, and clear the alpha channel. This is different to the other backends,
// Force alpha writes, and clear the alpha channel. This is different from the other backends,
// where the existing values of the alpha channel are preserved.
alpha_enable = true;
color &= 0x00FFFFFF;
Expand Down Expand Up @@ -641,8 +641,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
base_vertex, 0);
}

void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
StateTracker::GetInstance()->SetComputeShader(static_cast<const VKShader*>(shader));
if (StateTracker::GetInstance()->BindCompute())
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Vulkan/VKRenderer.h
Expand Up @@ -77,8 +77,8 @@ class Renderer : public ::Renderer
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;
void SetFullscreen(bool enable_fullscreen) override;
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
Expand Up @@ -294,6 +294,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support.
config->backend_info.bSupportsLodBiasInSampler = false; // Dependent on OS.
config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features.
config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support.
}

void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)
Expand Down
10 changes: 10 additions & 0 deletions Source/Core/VideoCommon/DriverDetails.cpp
Expand Up @@ -98,6 +98,8 @@ constexpr BugInfo m_known_bugs[] = {
BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true},
{API_OPENGL, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN,
BUG_BROKEN_BITWISE_OP_NEGATION, -1.0, 108.4693462, true},
{API_VULKAN, OS_WINDOWS, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_PRIMITIVE_RESTART, -1.0,
Expand All @@ -120,6 +122,8 @@ constexpr BugInfo m_known_bugs[] = {
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_ALL, DRIVER_APPLE, Family::UNKNOWN, BUG_BROKEN_REVERSED_DEPTH_RANGE,
-1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY,
-1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
Expand All @@ -130,6 +134,8 @@ constexpr BugInfo m_known_bugs[] = {
-1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_ATI, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true},
{API_OPENGL, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN,
BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true},
{API_VULKAN, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN,
Expand All @@ -140,8 +146,12 @@ constexpr BugInfo m_known_bugs[] = {
-1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_APPLE, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_APPLE, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true},
};

static std::map<Bug, BugInfo> m_bugs;
Expand Down
4 changes: 3 additions & 1 deletion Source/Core/VideoCommon/DriverDetails.h
Expand Up @@ -13,7 +13,8 @@ namespace DriverDetails
enum API
{
API_OPENGL = (1 << 0),
API_VULKAN = (1 << 1)
API_VULKAN = (1 << 1),
API_METAL = (1 << 2),
};

// Enum of supported operating systems
Expand Down Expand Up @@ -64,6 +65,7 @@ enum Driver
DRIVER_IMGTEC, // Official PowerVR driver
DRIVER_VIVANTE, // Official Vivante driver
DRIVER_PORTABILITY, // Vulkan via Metal on macOS
DRIVER_APPLE, // Metal on macOS
DRIVER_UNKNOWN // Unknown driver, default to official hardware driver
};

Expand Down
52 changes: 47 additions & 5 deletions Source/Core/VideoCommon/FramebufferManager.cpp
Expand Up @@ -188,12 +188,23 @@ bool FramebufferManager::CreateEFBFramebuffer()
// Create resolved textures if MSAA is on
if (g_ActiveConfig.MultisamplingEnabled())
{
u32 flags = 0;
if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
flags |= AbstractTextureFlag_RenderTarget;
m_efb_resolve_color_texture = g_renderer->CreateTexture(
TextureConfig(efb_color_texture_config.width, efb_color_texture_config.height, 1,
efb_color_texture_config.layers, 1, efb_color_texture_config.format, 0),
efb_color_texture_config.layers, 1, efb_color_texture_config.format, flags),
"EFB color resolve texture");
if (!m_efb_resolve_color_texture)
return false;

if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
{
m_efb_color_resolve_framebuffer =
g_renderer->CreateFramebuffer(m_efb_resolve_color_texture.get(), nullptr);
if (!m_efb_color_resolve_framebuffer)
return false;
}
}

// We also need one to convert the D24S8 to R32F if that is being used (Adreno).
Expand Down Expand Up @@ -248,12 +259,27 @@ AbstractTexture* FramebufferManager::ResolveEFBColorTexture(const MathUtil::Rect
clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight());

// Resolve to our already-created texture.
for (u32 layer = 0; layer < GetEFBLayers(); layer++)
if (g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
{
m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region,
layer, 0);
for (u32 layer = 0; layer < GetEFBLayers(); layer++)
{
m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region,
layer, 0);
}
}
else
{
m_efb_color_texture->FinishedRendering();
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_efb_color_resolve_framebuffer.get());
g_renderer->SetPipeline(m_efb_color_resolve_pipeline.get());
g_renderer->SetTexture(0, m_efb_color_texture.get());
g_renderer->SetSamplerState(0, RenderState::GetPointSamplerState());
g_renderer->SetViewportAndScissor(clamped_region);
g_renderer->Draw(0, 3);
m_efb_resolve_color_texture->FinishedRendering();
g_renderer->EndUtilityDrawing();
}

m_efb_resolve_color_texture->FinishedRendering();
return m_efb_resolve_color_texture.get();
}
Expand Down Expand Up @@ -487,6 +513,22 @@ bool FramebufferManager::CompileReadbackPipelines()
m_efb_depth_resolve_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_depth_resolve_pipeline)
return false;

if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
{
config.framebuffer_state.color_texture_format = GetEFBColorFormat();
auto color_resolve_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel,
FramebufferShaderGen::GenerateResolveColorPixelShader(GetEFBSamples()),
"Color resolve pixel shader");
if (!color_resolve_shader)
return false;

config.pixel_shader = color_resolve_shader.get();
m_efb_color_resolve_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_color_resolve_pipeline)
return false;
}
}

// EFB restore pipeline
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/FramebufferManager.h
Expand Up @@ -170,7 +170,9 @@ class FramebufferManager final

std::unique_ptr<AbstractFramebuffer> m_efb_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_convert_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_color_resolve_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_depth_resolve_framebuffer;
std::unique_ptr<AbstractPipeline> m_efb_color_resolve_pipeline;
std::unique_ptr<AbstractPipeline> m_efb_depth_resolve_pipeline;

// Pipeline for restoring the contents of the EFB from a save state
Expand Down
21 changes: 21 additions & 0 deletions Source/Core/VideoCommon/FramebufferShaderGen.cpp
Expand Up @@ -34,6 +34,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
Expand All @@ -55,6 +56,7 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texture(samp{}, {})", n, coords);
Expand All @@ -72,6 +74,7 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
Expand All @@ -89,6 +92,7 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
Expand Down Expand Up @@ -138,6 +142,7 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
Expand Down Expand Up @@ -333,6 +338,22 @@ std::string GenerateColorPixelShader()
return code.GetBuffer();
}

std::string GenerateResolveColorPixelShader(u32 samples)
{
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, true);
EmitPixelMainDeclaration(code, 1, 0);
code.Write("{{\n"
" int layer = int(v_tex0.z);\n"
" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"
" ocol0 = float4(0.0f);\n");
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
code.Write(" ocol0 += texelFetch(samp0, coords, i);\n");
code.Write(" ocol0 /= {}.0f;\n", samples);
code.Write("}}\n");
return code.GetBuffer();
}

std::string GenerateResolveDepthPixelShader(u32 samples)
{
ShaderCode code;
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/FramebufferShaderGen.h
Expand Up @@ -15,6 +15,7 @@ std::string GenerateScreenQuadVertexShader();
std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors);
std::string GenerateTextureCopyVertexShader();
std::string GenerateTextureCopyPixelShader();
std::string GenerateResolveColorPixelShader(u32 samples);
std::string GenerateResolveDepthPixelShader(u32 samples);
std::string GenerateClearVertexShader();
std::string GenerateEFBPokeVertexShader();
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/PixelShaderGen.cpp
Expand Up @@ -810,7 +810,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
{
if (!uid_data->no_dual_src)
{
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/RenderBase.h
Expand Up @@ -113,8 +113,8 @@ class Renderer
virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {}

// Dispatching compute shaders with currently-bound state.
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
}

Expand Down
3 changes: 3 additions & 0 deletions Source/Core/VideoCommon/ShaderGenCommon.cpp
Expand Up @@ -60,6 +60,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
case APIType::D3D:
filename += "D3D";
break;
case APIType::Metal:
filename += "Metal";
break;
case APIType::OpenGL:
filename += "OpenGL";
break;
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/Spirv.cpp
Expand Up @@ -55,7 +55,7 @@ CompileShaderToSPV(EShLanguage stage, APIType api_type,
glslang::TShader::ForbidIncluder includer;
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules);
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
messages = static_cast<EShMessages>(messages | EShMsgVulkanRules);
int default_version = 450;

Expand Down
3 changes: 2 additions & 1 deletion Source/Core/VideoCommon/TextureCacheBase.cpp
Expand Up @@ -2924,7 +2924,8 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con

auto dispatch_groups =
TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height);
g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1);
g_renderer->DispatchComputeShader(shader, info->group_size_x, info->group_size_y, 1,
dispatch_groups.first, dispatch_groups.second, 1);

// Copy from decoding texture -> final texture
// This is because we don't want to have to create compute view for every layer
Expand Down
112 changes: 88 additions & 24 deletions Source/Core/VideoCommon/TextureConversionShader.cpp
Expand Up @@ -519,10 +519,49 @@ UBO_BINDING(std140, 1) uniform UBO {
uint u_palette_offset;
};
#if defined(API_METAL)
#if defined(TEXEL_BUFFER_FORMAT_R8)
SSBO_BINDING(0) readonly buffer Input { uint8_t s_input_buffer[]; };
#define FETCH(offset) uint(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_R16)
SSBO_BINDING(0) readonly buffer Input { uint16_t s_input_buffer[]; };
#define FETCH(offset) uint(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_RGBA8)
SSBO_BINDING(0) readonly buffer Input { u8vec4 s_input_buffer[]; };
#define FETCH(offset) uvec4(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_R32G32)
SSBO_BINDING(0) readonly buffer Input { uvec2 s_input_buffer[]; };
#define FETCH(offset) s_input_buffer[offset]
#else
#error No texel buffer?
#endif
#ifdef HAS_PALETTE
SSBO_BINDING(1) readonly buffer Palette { uint16_t s_palette_buffer[]; };
#define FETCH_PALETTE(offset) uint(s_palette_buffer[offset])
#endif
#else
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#if defined(TEXEL_BUFFER_FORMAT_R8) || defined(TEXEL_BUFFER_FORMAT_R16)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).r
#elif defined(TEXEL_BUFFER_FORMAT_RGBA8)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset))
#elif defined(TEXEL_BUFFER_FORMAT_R32G32)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).rg
#else
#error No texel buffer?
#endif
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#define FETCH_PALETTE(offset) texelFetch(s_palette_buffer, int((offset) + u_palette_offset)).r
#endif
#endif // defined(API_METAL)
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
#define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier();
Expand Down Expand Up @@ -563,7 +602,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
{
uint2 block = coords / block_size;
uint2 offset = coords % block_size;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * (block_size.x * block_size.y);
buffer_pos += offset.y * block_size.x;
Expand All @@ -575,7 +614,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
uint4 GetPaletteColor(uint index)
{
// Fetch and swap BE to LE.
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
uint val = Swap16(FETCH_PALETTE(index));
uint4 color;
#if defined(PALETTE_FORMAT_IA8)
Expand Down Expand Up @@ -633,14 +672,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint i;
if ((coords.x & 1u) == 0u)
i = Convert4To8((val >> 4));
Expand All @@ -663,7 +702,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint i = Convert4To8((val & 0x0Fu));
uint a = Convert4To8((val >> 4));
uint4 color = uint4(i, i, i, a);
Expand All @@ -681,7 +720,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i = FETCH(buffer_pos);
uint4 color = uint4(i, i, i, i);
float4 norm_color = float4(color) / 255.0;
Expand All @@ -697,7 +736,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint a = (val & 0xFFu);
uint i = (val >> 8);
uint4 color = uint4(i, i, i, a);
Expand All @@ -714,7 +753,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uint val = Swap16(FETCH(buffer_pos));
uint4 color;
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
Expand All @@ -736,7 +775,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uint val = Swap16(FETCH(buffer_pos));
uint4 color;
if ((val & 0x8000u) != 0u)
Expand Down Expand Up @@ -771,7 +810,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// for the entire block, then the GB channels afterwards.
uint2 block = coords.xy / 4u;
uint2 offset = coords.xy % 4u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
// Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
buffer_pos += block.y * u_src_row_stride;
Expand All @@ -780,8 +819,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
buffer_pos += offset.x;
// The two GB channels follow after the block's AR channels.
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
uint val1 = FETCH(buffer_pos + 0u);
uint val2 = FETCH(buffer_pos + 16u);
uint4 color;
color.a = (val1 & 0xFFu);
Expand Down Expand Up @@ -835,14 +874,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Calculate tiled block coordinates.
uint2 tile_block_coords = block_coords / 2u;
uint2 subtile_block_coords = block_coords % 2u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += tile_block_coords.y * u_src_row_stride;
buffer_pos += tile_block_coords.x * 4u;
buffer_pos += subtile_block_coords.y * 2u;
buffer_pos += subtile_block_coords.x;
// Read the entire DXT block to shared memory.
uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
uint2 raw_data = FETCH(buffer_pos);
shared_temp[block_in_group] = raw_data;
}
Expand Down Expand Up @@ -921,14 +960,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
Expand All @@ -945,7 +984,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint index = FETCH(buffer_pos);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
Expand All @@ -960,7 +999,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
uint index = Swap16(FETCH(buffer_pos)) & 0x3FFFu;
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
Expand All @@ -976,8 +1015,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
DEFINE_MAIN(8, 8)
{
uint2 uv = gl_GlobalInvocationID.xy;
int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u));
float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos));
uint buffer_pos = (uv.y * u_src_row_stride) + (uv.x / 2u);
float4 yuyv = float4(FETCH(buffer_pos));
float y = (uv.x & 1u) != 0u ? yuyv.b : yuyv.r;
Expand Down Expand Up @@ -1034,6 +1073,25 @@ std::string GenerateDecodingShader(TextureFormat format, std::optional<TLUTForma
}
}

switch (info->buffer_format)
{
case TEXEL_BUFFER_FORMAT_R8_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R8 1\n";
break;
case TEXEL_BUFFER_FORMAT_R16_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R16 1\n";
break;
case TEXEL_BUFFER_FORMAT_RGBA8_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_RGBA8 1\n";
break;
case TEXEL_BUFFER_FORMAT_R32G32_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R32G32 1\n";
break;
case NUM_TEXEL_BUFFER_FORMATS:
ASSERT(0);
break;
}

ss << decoding_shader_header;
ss << info->shader_body;

Expand Down Expand Up @@ -1121,7 +1179,10 @@ float4 DecodePixel(int val)

ss << "\n";

ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
if (api_type == APIType::Metal)
ss << "SSBO_BINDING(0) readonly buffer Palette { uint16_t palette[]; };\n";
else
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";

Expand All @@ -1143,9 +1204,12 @@ float4 DecodePixel(int val)
ss << "void main() {\n";
ss << " float3 coords = v_tex0;\n";
ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n";
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
if (api_type == APIType::Metal)
ss << " src = int(palette[uint(src)]);\n";
else
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";

ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n";
ss << " src = ((src << 8) | (src >> 8)) & 0xFFFF;\n";
ss << " ocol0 = DecodePixel(src);\n";
ss << "}\n";

Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/UberShaderPixel.cpp
Expand Up @@ -82,7 +82,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
{
if (use_dual_source)
{
Expand Down
4 changes: 4 additions & 0 deletions Source/Core/VideoCommon/VideoBackendBase.cpp
Expand Up @@ -35,6 +35,9 @@
#ifdef HAS_VULKAN
#include "VideoBackends/Vulkan/VideoBackend.h"
#endif
#ifdef __APPLE__
#include "VideoBackends/Metal/VideoBackend.h"
#endif

#include "VideoCommon/AsyncRequests.h"
#include "VideoCommon/BPStructs.h"
Expand Down Expand Up @@ -227,6 +230,7 @@ const std::vector<std::unique_ptr<VideoBackendBase>>& VideoBackendBase::GetAvail
#ifdef __APPLE__
// Emplace the Vulkan backend at the beginning so it takes precedence over OpenGL.
backends.emplace(backends.begin(), std::make_unique<Vulkan::VideoBackend>());
backends.push_back(std::make_unique<Metal::VideoBackend>());
#else
backends.push_back(std::make_unique<Vulkan::VideoBackend>());
#endif
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/VideoCommon.h
Expand Up @@ -39,6 +39,7 @@ enum class APIType
OpenGL,
D3D,
Vulkan,
Metal,
Nothing
};

Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/VideoConfig.h
Expand Up @@ -238,6 +238,7 @@ struct VideoConfig final
bool bSupportsTextureQueryLevels = false;
bool bSupportsLodBiasInSampler = false;
bool bSupportsSettingObjectNames = false;
bool bSupportsPartialMultisampleResolve = false;
} backend_info;

// Utility
Expand Down