Skip to content

Commit

Permalink
WebNN: Implement CommandRecorder::ExecuteOperator() for DML backend
Browse files Browse the repository at this point in the history
This CL implements `CommandRecorder::ExecuteOperator()` that executes a
compiled DirectML operator on the GPU with input and output resources
bindings. The caller should call `InitializeOperator()` for this
operator before its execution. If the operator execution requires any
persistent resources, they should also be initialized before and
supplied when calling this method.

This CL extends the operator initialization unit tests so that they can
also test execution of Relu and Convolution operators and verify the
computation result.

This CL adds another ExecuteReluOperatorForMultipleBindings test case
that ensures the operator execution can be dispatched multiple times
with different bindings before waiting for the GPU work to complete.
This test case emulates the scenario that JavaScript code may call
MLGraph compute method without waiting for the promise of the previous
compute to be resolved.

Bug: 1273291
Change-Id: Id57e63d5e9eff888d6929b905c661f0594f39909
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4607584
Reviewed-by: Rafael Cintron <rafael.cintron@microsoft.com>
Commit-Queue: ningxin hu <ningxin.hu@intel.com>
Cr-Commit-Position: refs/heads/main@{#1159241}
  • Loading branch information
huningxin authored and Chromium LUCI CQ committed Jun 17, 2023
1 parent 5016469 commit 83c6f55
Show file tree
Hide file tree
Showing 3 changed files with 448 additions and 41 deletions.
89 changes: 81 additions & 8 deletions services/webnn/dml/command_recorder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,88 @@ HRESULT CommandRecorder::InitializeOperator(
return S_OK;
}

HRESULT CommandRecorder::ExecuteGraph(
GraphDMLImpl* graph,
const std::vector<DML_BINDING_DESC>& input_bindings,
const std::vector<DML_BINDING_DESC>& output_bindings) {
HRESULT CommandRecorder::ExecuteOperator(
IDMLCompiledOperator* compiled_operator,
base::span<const DML_BINDING_DESC> input_bindings,
base::span<const DML_BINDING_DESC> output_bindings,
const absl::optional<DML_BINDING_DESC>& persistent_resource_binding) {
CHECK(is_open_);
CHECK(graph);
// TODO(crbug.com/1273291): This method will be implemented after the
// GraphDMLImpl class has been defined.
NOTIMPLEMENTED();
CHECK(compiled_operator);

DML_BINDING_PROPERTIES execution_binding_properties =
compiled_operator->GetBindingProperties();

// TODO(crbug.com/1455278): Consider maintaining a descriptors pool for better
// resource reuse.
ComPtr<ID3D12DescriptorHeap> descriptor_heap;
CHECK_GT(execution_binding_properties.RequiredDescriptorCount, 0u);
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc{
.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
.NumDescriptors = execution_binding_properties.RequiredDescriptorCount,
.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE};
RETURN_IF_FAILED(adapter_->d3d12_device()->CreateDescriptorHeap(
&descriptor_heap_desc, IID_PPV_ARGS(&descriptor_heap)));

ID3D12DescriptorHeap* descriptor_heaps[] = {descriptor_heap.Get()};
command_list_->SetDescriptorHeaps(/* NumDescriptorHeaps */ 1,
descriptor_heaps);

DML_BINDING_TABLE_DESC binding_table_desc = {
.Dispatchable = compiled_operator,
.CPUDescriptorHandle =
descriptor_heap->GetCPUDescriptorHandleForHeapStart(),
.GPUDescriptorHandle =
descriptor_heap->GetGPUDescriptorHandleForHeapStart(),
.SizeInDescriptors =
execution_binding_properties.RequiredDescriptorCount};
// TODO(crbug.com/1455278): Consider reusing the binding table.
ComPtr<IDMLBindingTable> binding_table;
RETURN_IF_FAILED(adapter_->dml_device()->CreateBindingTable(
&binding_table_desc, IID_PPV_ARGS(&binding_table)));

// Create and bind the temporary resource if the operator execution requires.
auto temp_resource_size = execution_binding_properties.TemporaryResourceSize;
if (temp_resource_size > 0) {
ComPtr<ID3D12Resource> temp_resource;
RETURN_IF_FAILED(
adapter_->CreateDefaultBuffer(temp_resource_size, temp_resource));
DML_BUFFER_BINDING temp_buffer_binding{.Buffer = temp_resource.Get(),
.Offset = 0,
.SizeInBytes = temp_resource_size};
DML_BINDING_DESC temp_binding_desc{.Type = DML_BINDING_TYPE_BUFFER,
.Desc = &temp_buffer_binding};
binding_table->BindTemporaryResource(&temp_binding_desc);
adapter_->command_queue()->ReferenceUntilCompleted(
std::move(temp_resource));
}

// The persistent resource should be bound if the operator execution requires.
auto persistent_buffer_size =
execution_binding_properties.PersistentResourceSize;
if (persistent_buffer_size > 0) {
CHECK_EQ(persistent_resource_binding.has_value(), true);
CHECK_EQ(persistent_resource_binding.value().Type, DML_BINDING_TYPE_BUFFER);
binding_table->BindPersistentResource(&persistent_resource_binding.value());
}

// Bind the input and output resources.
binding_table->BindInputs(base::checked_cast<uint32_t>(input_bindings.size()),
input_bindings.data());
binding_table->BindOutputs(
base::checked_cast<uint32_t>(output_bindings.size()),
output_bindings.data());

// Dispatch the execution of the compiled operator.
command_recorder_->RecordDispatch(command_list_.Get(), compiled_operator,
binding_table.Get());

// It's safe to release the binding table right after the dispatch has been
// recorded into the command list. However, the heap which is referred to by
// the GPU descriptor handle should be kept alive until all work referencing
// it has completed execution on the GPU.
adapter_->command_queue()->ReferenceUntilCompleted(
std::move(descriptor_heap));

return S_OK;
}

Expand Down
31 changes: 27 additions & 4 deletions services/webnn/dml/command_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ namespace webnn::dml {
using Microsoft::WRL::ComPtr;

class Adapter;
class GraphDMLImpl;

// CommandRecorder is mainly responsible for the initialization and execution of
// a DirectML graph. It wraps a DirectML command recorder, and manages the
Expand Down Expand Up @@ -83,9 +82,33 @@ class CommandRecorder final {
const absl::optional<DML_BINDING_DESC>& input_array_binding,
const absl::optional<DML_BINDING_DESC>& persistent_resource_binding);

HRESULT ExecuteGraph(GraphDMLImpl* graph,
const std::vector<DML_BINDING_DESC>& input_bindings,
const std::vector<DML_BINDING_DESC>& output_bindings);
// Execute a compiled DirectML operator after it is initialized. The caller is
// allowed to call this method multiple times to record operator executions
// with different inputs. The caller should wait for the operator execution to
// complete on the GPU before reading back the results.
//
// The input and output resources are supplied by the caller via
// `input_bindings` and `output_bindings`. The input and output resources will
// be bound to the operator's binding table. The number of bindings should
// exactly match the number of input and output tensors of this operator. All
// bound resources need to be in the D3D12_RESOURCE_STATE_UNORDERED_ACCESS
// state before calling this method. It's the caller's responsibility to keep
// these resources alive until the operator execution work completes on the
// GPU.
//
// If the compiled operator also requires any persistent resources, they
// should be initialized by `InitializeOperator()` and be supplied via
// `persistent_resource_binding`. The lifecycle of the persistent resource
// should be the same as other input and output resources.
//
// This method will create necessary temporary resources for the operator
// execution and these temporary resources will be kept alive until the GPU
// work is done.
HRESULT ExecuteOperator(
IDMLCompiledOperator* compiled_operator,
base::span<const DML_BINDING_DESC> input_bindings,
base::span<const DML_BINDING_DESC> output_bindings,
const absl::optional<DML_BINDING_DESC>& persistent_resource_binding);

private:
CommandRecorder(scoped_refptr<Adapter> adapter,
Expand Down

0 comments on commit 83c6f55

Please sign in to comment.