Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Impeller] Encode render passes concurrently on iOS. #42028

Merged
merged 23 commits into from May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 1 addition & 5 deletions impeller/entity/contents/content_context.cc
Expand Up @@ -370,11 +370,7 @@ std::shared_ptr<Texture> ContentContext::MakeSubpass(
return nullptr;
}

if (!sub_renderpass->EncodeCommands()) {
return nullptr;
}

if (!sub_command_buffer->SubmitCommands()) {
if (!sub_command_buffer->SubmitCommandsAsync(std::move(sub_renderpass))) {
return nullptr;
}

Expand Down
12 changes: 3 additions & 9 deletions impeller/entity/inline_pass_context.cc
Expand Up @@ -54,15 +54,9 @@ bool InlinePassContext::EndPass() {
return true;
}

if (!pass_->EncodeCommands()) {
VALIDATION_LOG
<< "Failed to encode commands while ending the current render pass.";
return false;
}

if (!command_buffer_->SubmitCommands()) {
VALIDATION_LOG
<< "Failed to submit command buffer while ending render pass.";
if (!command_buffer_->SubmitCommandsAsync(std::move(pass_))) {
VALIDATION_LOG << "Failed to encode and submit command buffer while ending "
"render pass.";
return false;
}

Expand Down
2 changes: 2 additions & 0 deletions impeller/playground/backend/metal/playground_impl_mtl.h
Expand Up @@ -6,6 +6,7 @@

#include <memory>

#include "flutter/fml/concurrent_message_loop.h"
#include "flutter/fml/macros.h"
#include "impeller/playground/playground_impl.h"

Expand All @@ -27,6 +28,7 @@ class PlaygroundImplMTL final : public PlaygroundImpl {
// To ensure that ObjC stuff doesn't leak into C++ TUs.
std::unique_ptr<Data> data_;
std::shared_ptr<Context> context_;
std::shared_ptr<fml::ConcurrentMessageLoop> concurrent_loop_;

// |PlaygroundImpl|
std::shared_ptr<Context> GetContext() const override;
Expand Down
6 changes: 4 additions & 2 deletions impeller/playground/backend/metal/playground_impl_mtl.mm
Expand Up @@ -63,16 +63,18 @@
PlaygroundImplMTL::PlaygroundImplMTL(PlaygroundSwitches switches)
: PlaygroundImpl(switches),
handle_(nullptr, &DestroyWindowHandle),
data_(std::make_unique<Data>()) {
data_(std::make_unique<Data>()),
concurrent_loop_(fml::ConcurrentMessageLoop::Create()) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is more or less duplicating the problem that the VKContext has wherein we should only be creating a single concurrent message loop once per engine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW We switched around ContextVK so that it is made to support sharing a concurrent message loop. There's no known problem. This came from Chinmay's design goal of wanting to share these. I'm not sure what that means for your PR, I'm just clarifying that point.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look at ContextVK then, when I started this PR ContextVK was still creating its own message loop

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ContextVK is still creating its own concurrent message loop?

workers_(fml::ConcurrentMessageLoop::Create()) {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ContextVK takes in a shared_ptr to a task queue which can be shared across multiple engines, but currently is not:

std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner;

I had a PR out that made ContextVK own a single concurrent message loop but we decided we didn't want to shut the door on sharing concurrent message loops between engines. I haven't read this PR through, I'm just responding to the comment "the problem that the VKContext has wherein we should only be creating a single concurrent message loop once per engine". There is no problem that I know of with Vulkan and we made sure of that with our recent work. Let me know if you want me review this PR if you think it would be helpful.

::glfwDefaultWindowHints();
::glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
::glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE);
auto window = ::glfwCreateWindow(1, 1, "Test", nullptr, nullptr);
if (!window) {
return;
}
auto worker_task_runner = concurrent_loop_->GetTaskRunner();
auto context = ContextMTL::Create(ShaderLibraryMappingsForPlayground(),
"Playground Library");
worker_task_runner, "Playground Library");
if (!context) {
return;
}
Expand Down
3 changes: 3 additions & 0 deletions impeller/renderer/backend/metal/command_buffer_mtl.h
Expand Up @@ -36,6 +36,9 @@ class CommandBufferMTL final : public CommandBuffer {
// |CommandBuffer|
void OnWaitUntilScheduled() override;

// |CommandBuffer|
bool SubmitCommandsAsync(std::shared_ptr<RenderPass> render_pass) override;

// |CommandBuffer|
std::shared_ptr<RenderPass> OnCreateRenderPass(RenderTarget target) override;

Expand Down
57 changes: 57 additions & 0 deletions impeller/renderer/backend/metal/command_buffer_mtl.mm
Expand Up @@ -4,8 +4,13 @@

#include "impeller/renderer/backend/metal/command_buffer_mtl.h"

#include "flutter/fml/make_copyable.h"
#include "flutter/fml/synchronization/semaphore.h"
#include "flutter/fml/trace_event.h"

#include "impeller/renderer/backend/metal/blit_pass_mtl.h"
#include "impeller/renderer/backend/metal/compute_pass_mtl.h"
#include "impeller/renderer/backend/metal/context_mtl.h"
#include "impeller/renderer/backend/metal/render_pass_mtl.h"

namespace impeller {
Expand Down Expand Up @@ -171,6 +176,58 @@ static bool LogMTLCommandBufferErrorIfPresent(id<MTLCommandBuffer> buffer) {
return true;
}

bool CommandBufferMTL::SubmitCommandsAsync(
std::shared_ptr<RenderPass> render_pass) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently this only works for the render pass but could optionally take the blit and compute passes too. I moved this functionality to the command buffer to make ownership of the render pass data more obvious - the closure keeps the shared_ptr for the render pass alive while the command buffer itself only requires the buffer.

TRACE_EVENT0("impeller", "CommandBufferMTL::SubmitCommandsAsync");
if (!IsValid() || !render_pass->IsValid()) {
return false;
}
auto context = context_.lock();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is more or less what ended up being the fix for problems around ownership in Vulkan backend.

if (!context) {
return false;
}
[buffer_ enqueue];
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enquing on the main therad ensures the order

auto buffer = buffer_;
buffer_ = nil;

auto worker_task_runner = ContextMTL::Cast(*context).GetWorkerTaskRunner();

auto mtl_render_pass = static_cast<RenderPassMTL*>(render_pass.get());

// Render command encoder creation has been observed to exceed the stack size
// limit for worker threads, and therefore is intentionally constructed on the
// raster thread.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not bump the stack size limit for worker threads?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can I try that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's platform specific code, we'd need a pthreads and a win32 implementation at the least.

auto render_command_encoder =
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a frustrating discovery because it prevents us from doing the obvious thing and just calling RenderPass::Encode in the closure.

[buffer renderCommandEncoderWithDescriptor:mtl_render_pass->desc_];
if (!render_command_encoder) {
return false;
}

auto task = fml::MakeCopyable(
[render_pass, buffer, render_command_encoder, weak_context = context_]() {
auto context = weak_context.lock();
if (!context) {
return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm realizing this will not protect us from potentially encoding command in the background.

We need the gpu sync switch in here so we can fizzle if GPU access is disabled. That's available via Shell::GetIsGpuDisabledSyncSwitch. I think it would make sense to expose that on ContextMTL.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which operations are forbidden in the background? Is just submitting the cmd buffer? I'm curious what stops us from doing that now on the single raster thread

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(that is, do we need to use the sync switch elsewhere too?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Committing encoding work is forbidden: https://developer.apple.com/documentation/metal/gpu_devices_and_work_submission/preparing_your_metal_app_to_run_in_the_background?language=objc

Before this change, the work would all be guarded by the rasterizer - but now, spawning new threads, it's harder to predict how scheduling will occur and the rasterizer may get to the end of the function it's calling but the task spawned to encode new work happens after.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically, [buffer commit] below is definitely illegal in the background, but I think EncodeCommands may make other calls that are probably not allowed.

At any rate, we can't trust that we're on the raster thread with the rasterizer having checked whether GPU access is allowed in this closure.

}

auto mtl_render_pass = static_cast<RenderPassMTL*>(render_pass.get());
if (!mtl_render_pass->label_.empty()) {
[render_command_encoder setLabel:@(mtl_render_pass->label_.c_str())];
}

auto result = mtl_render_pass->EncodeCommands(
context->GetResourceAllocator(), render_command_encoder);
[render_command_encoder endEncoding];
if (result) {
[buffer commit];
} else {
VALIDATION_LOG << "Failed to encode command buffer";
}
});
worker_task_runner->PostTask(task);
return true;
}

void CommandBufferMTL::OnWaitUntilScheduled() {}

std::shared_ptr<RenderPass> CommandBufferMTL::OnCreateRenderPass(
Expand Down
12 changes: 10 additions & 2 deletions impeller/renderer/backend/metal/context_mtl.h
Expand Up @@ -9,6 +9,7 @@
#include <string>
#include <vector>

#include "flutter/fml/concurrent_message_loop.h"
#include "flutter/fml/macros.h"
#include "impeller/base/backend_cast.h"
#include "impeller/core/sampler.h"
Expand All @@ -26,10 +27,12 @@ class ContextMTL final : public Context,
public std::enable_shared_from_this<ContextMTL> {
public:
static std::shared_ptr<ContextMTL> Create(
const std::vector<std::string>& shader_library_paths);
const std::vector<std::string>& shader_library_paths,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner);

static std::shared_ptr<ContextMTL> Create(
const std::vector<std::shared_ptr<fml::Mapping>>& shader_libraries_data,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
const std::string& label);

// |Context|
Expand Down Expand Up @@ -66,6 +69,8 @@ class ContextMTL final : public Context,

id<MTLCommandBuffer> CreateMTLCommandBuffer() const;

const std::shared_ptr<fml::ConcurrentTaskRunner>& GetWorkerTaskRunner() const;

private:
id<MTLDevice> device_ = nullptr;
id<MTLCommandQueue> command_queue_ = nullptr;
Expand All @@ -74,9 +79,12 @@ class ContextMTL final : public Context,
std::shared_ptr<SamplerLibrary> sampler_library_;
std::shared_ptr<AllocatorMTL> resource_allocator_;
std::shared_ptr<const Capabilities> device_capabilities_;
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner_;
bool is_valid_ = false;

ContextMTL(id<MTLDevice> device, NSArray<id<MTLLibrary>>* shader_libraries);
ContextMTL(id<MTLDevice> device,
NSArray<id<MTLLibrary>>* shader_libraries,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner);

std::shared_ptr<CommandBuffer> CreateCommandBufferInQueue(
id<MTLCommandQueue> queue) const;
Expand Down
23 changes: 17 additions & 6 deletions impeller/renderer/backend/metal/context_mtl.mm
Expand Up @@ -65,9 +65,11 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {
.Build();
}

ContextMTL::ContextMTL(id<MTLDevice> device,
NSArray<id<MTLLibrary>>* shader_libraries)
: device_(device) {
ContextMTL::ContextMTL(
id<MTLDevice> device,
NSArray<id<MTLLibrary>>* shader_libraries,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner)
: device_(device), worker_task_runner_(std::move(worker_task_runner)) {
// Validate device.
if (!device_) {
VALIDATION_LOG << "Could not setup valid Metal device.";
Expand Down Expand Up @@ -200,10 +202,12 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {
}

std::shared_ptr<ContextMTL> ContextMTL::Create(
const std::vector<std::string>& shader_library_paths) {
const std::vector<std::string>& shader_library_paths,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner) {
auto device = CreateMetalDevice();
auto context = std::shared_ptr<ContextMTL>(new ContextMTL(
device, MTLShaderLibraryFromFilePaths(device, shader_library_paths)));
device, MTLShaderLibraryFromFilePaths(device, shader_library_paths),
std::move(worker_task_runner)));
if (!context->IsValid()) {
FML_LOG(ERROR) << "Could not create Metal context.";
return nullptr;
Expand All @@ -213,11 +217,13 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {

std::shared_ptr<ContextMTL> ContextMTL::Create(
const std::vector<std::shared_ptr<fml::Mapping>>& shader_libraries_data,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
const std::string& label) {
auto device = CreateMetalDevice();
auto context = std::shared_ptr<ContextMTL>(new ContextMTL(
device,
MTLShaderLibraryFromFileData(device, shader_libraries_data, label)));
MTLShaderLibraryFromFileData(device, shader_libraries_data, label),
worker_task_runner));
if (!context->IsValid()) {
FML_LOG(ERROR) << "Could not create Metal context.";
return nullptr;
Expand Down Expand Up @@ -257,6 +263,11 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {
return CreateCommandBufferInQueue(command_queue_);
}

const std::shared_ptr<fml::ConcurrentTaskRunner>&
ContextMTL::GetWorkerTaskRunner() const {
return worker_task_runner_;
}

std::shared_ptr<CommandBuffer> ContextMTL::CreateCommandBufferInQueue(
id<MTLCommandQueue> queue) const {
if (!IsValid()) {
Expand Down
2 changes: 2 additions & 0 deletions impeller/renderer/backend/metal/render_pass_mtl.mm
Expand Up @@ -6,11 +6,13 @@

#include "flutter/fml/closure.h"
#include "flutter/fml/logging.h"
#include "flutter/fml/make_copyable.h"
#include "flutter/fml/trace_event.h"
#include "impeller/base/backend_cast.h"
#include "impeller/core/formats.h"
#include "impeller/core/host_buffer.h"
#include "impeller/core/shader_types.h"
#include "impeller/renderer/backend/metal/context_mtl.h"
#include "impeller/renderer/backend/metal/device_buffer_mtl.h"
#include "impeller/renderer/backend/metal/formats_mtl.h"
#include "impeller/renderer/backend/metal/pipeline_mtl.h"
Expand Down
15 changes: 15 additions & 0 deletions impeller/renderer/command_buffer.cc
Expand Up @@ -36,6 +36,21 @@ void CommandBuffer::WaitUntilScheduled() {
return OnWaitUntilScheduled();
}

bool CommandBuffer::SubmitCommandsAsync(
std::shared_ptr<RenderPass>
render_pass // NOLINT(performance-unnecessary-value-param)
) {
TRACE_EVENT0("impeller", "CommandBuffer::SubmitCommandsAsync");
if (!render_pass->IsValid() || !IsValid()) {
return false;
}
if (!render_pass->EncodeCommands()) {
return false;
}

return SubmitCommands(nullptr);
}

std::shared_ptr<RenderPass> CommandBuffer::CreateRenderPass(
const RenderTarget& render_target) {
auto pass = OnCreateRenderPass(render_target);
Expand Down
14 changes: 13 additions & 1 deletion impeller/renderer/command_buffer.h
Expand Up @@ -55,7 +55,8 @@ class CommandBuffer {

//----------------------------------------------------------------------------
/// @brief Schedule the command encoded by render passes within this
/// command buffer on the GPU.
/// command buffer on the GPU. The encoding of these commnands is
/// performed immediately on the calling thread.
///
/// A command buffer may only be committed once.
///
Expand All @@ -65,6 +66,17 @@ class CommandBuffer {

[[nodiscard]] bool SubmitCommands();

//----------------------------------------------------------------------------
/// @brief Schedule the command encoded by render passes within this
/// command buffer on the GPU. The enqueing of this buffer is
/// performed immediately but encoding is pushed to a worker
/// thread if possible.
///
/// A command buffer may only be committed once.
///
[[nodiscard]] virtual bool SubmitCommandsAsync(
std::shared_ptr<RenderPass> render_pass);

//----------------------------------------------------------------------------
/// @brief Force execution of pending GPU commands.
///
Expand Down
1 change: 1 addition & 0 deletions impeller/renderer/render_pass.h
Expand Up @@ -7,6 +7,7 @@
#include <string>

#include "impeller/renderer/command.h"
#include "impeller/renderer/command_buffer.h"
#include "impeller/renderer/render_target.h"

namespace impeller {
Expand Down
Expand Up @@ -9,6 +9,7 @@
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>

#include "flutter/fml/concurrent_message_loop.h"
#include "flutter/fml/platform/darwin/cf_utils.h"
#import "flutter/shell/platform/darwin/common/framework/Headers/FlutterTexture.h"
#import "flutter/shell/platform/darwin/graphics/FlutterDarwinExternalTextureMetal.h"
Expand All @@ -24,7 +25,7 @@ NS_ASSUME_NONNULL_BEGIN
/**
* Initializes a FlutterDarwinContextMetalImpeller.
*/
- (instancetype)init;
- (instancetype)initWithTaskRunner:(std::shared_ptr<fml::ConcurrentTaskRunner>)task_runner;

/**
* Creates an external texture with the specified ID and contents.
Expand All @@ -34,7 +35,7 @@ NS_ASSUME_NONNULL_BEGIN
texture:(NSObject<FlutterTexture>*)texture;

/**
* Impeller context;
* Impeller context.
*/
@property(nonatomic, readonly) std::shared_ptr<impeller::ContextMTL> context;

Expand Down
Expand Up @@ -16,7 +16,8 @@

FLUTTER_ASSERT_ARC

static std::shared_ptr<impeller::ContextMTL> CreateImpellerContext() {
static std::shared_ptr<impeller::ContextMTL> CreateImpellerContext(
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner) {
std::vector<std::shared_ptr<fml::Mapping>> shader_mappings = {
std::make_shared<fml::NonOwnedMapping>(impeller_entity_shaders_data,
impeller_entity_shaders_length),
Expand All @@ -27,7 +28,8 @@
std::make_shared<fml::NonOwnedMapping>(impeller_framebuffer_blend_shaders_data,
impeller_framebuffer_blend_shaders_length),
};
auto context = impeller::ContextMTL::Create(shader_mappings, "Impeller Library");
auto context = impeller::ContextMTL::Create(shader_mappings, std::move(worker_task_runner),
"Impeller Library");
if (!context) {
FML_LOG(ERROR) << "Could not create Metal Impeller Context.";
return nullptr;
Expand All @@ -39,10 +41,10 @@

@implementation FlutterDarwinContextMetalImpeller

- (instancetype)init {
- (instancetype)initWithTaskRunner:(std::shared_ptr<fml::ConcurrentTaskRunner>)task_runner {
self = [super init];
if (self != nil) {
_context = CreateImpellerContext();
_context = CreateImpellerContext(std::move(task_runner));
id<MTLDevice> device = _context->GetMTLDevice();
if (!device) {
FML_DLOG(ERROR) << "Could not acquire Metal device.";
Expand Down
3 changes: 2 additions & 1 deletion shell/platform/darwin/ios/framework/Source/FlutterEngine.mm
Expand Up @@ -836,7 +836,8 @@ - (BOOL)createShell:(NSString*)entrypoint
[self](flutter::Shell& shell) {
[self recreatePlatformViewController];
return std::make_unique<flutter::PlatformViewIOS>(
shell, self->_renderingApi, self->_platformViewsController, shell.GetTaskRunners());
shell, self->_renderingApi, self->_platformViewsController, shell.GetTaskRunners(),
shell.GetConcurrentWorkerTaskRunner());
};

flutter::Shell::CreateCallback<flutter::Rasterizer> on_create_rasterizer =
Expand Down