Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/windows_webgpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:
${{ matrix.vcpkg_option == 'vcpkg' && '--use_vcpkg' || '' }} `
--cmake_extra_defines `
onnxruntime_BUILD_UNIT_TESTS=ON `
onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY=ON
onnxruntime_BUILD_DAWN_SHARED_LIBRARY=ON
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
Expand Down
10 changes: 5 additions & 5 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C+
option(onnxruntime_WGSL_TEMPLATE "Specify the code generator for WGSL template. Default is static." "static")
option(onnxruntime_USE_EXTERNAL_DAWN "Build with treating Dawn as external dependency. Will not link Dawn at build time." OFF)
option(onnxruntime_CUSTOM_DAWN_SRC_PATH "Path to custom Dawn src dir.")
option(onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY "Build Dawn as a monolithic library" OFF)
option(onnxruntime_BUILD_DAWN_SHARED_LIBRARY "Build Dawn as a shared library" OFF)
option(onnxruntime_ENABLE_PIX_FOR_WEBGPU_EP "Adding frame present for PIX to capture a frame" OFF)
# The following 2 options are only for Windows
option(onnxruntime_ENABLE_DAWN_BACKEND_VULKAN "Enable Vulkan backend for Dawn (on Windows)" OFF)
Expand Down Expand Up @@ -899,8 +899,8 @@ if (onnxruntime_USE_WEBGPU)
#
# if (onnxruntime_USE_VCPKG AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (FALSE)
if (NOT onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
message(FATAL_ERROR "onnxruntime_USE_VCPKG is not supported with onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY=OFF")
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
message(FATAL_ERROR "onnxruntime_USE_VCPKG is not supported with onnxruntime_BUILD_DAWN_SHARED_LIBRARY=ON")
endif()
if (onnxruntime_USE_EXTERNAL_DAWN)
message(FATAL_ERROR "onnxruntime_USE_VCPKG is not supported with onnxruntime_USE_EXTERNAL_DAWN=ON")
Expand All @@ -921,8 +921,8 @@ if (onnxruntime_USE_WEBGPU)
endif()
endif()

if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
list(APPEND ORT_PROVIDER_FLAGS -DBUILD_DAWN_MONOLITHIC_LIBRARY=1)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
list(APPEND ORT_PROVIDER_FLAGS -DBUILD_DAWN_SHARED_LIBRARY=1)
endif()
if (onnxruntime_USE_EXTERNAL_DAWN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_EXTERNAL_DAWN=1)
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.9.2.zip;b7f8dc4a8
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0c12f53da76d0c31b03b9f0f8ec8f3b4.zip;239063aee4946a9af147b473a4c3da78ba7413b4
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
dawn;https://github.com/google/dawn/archive/9733be39e18186961d503e064874afe3e9ceb8d1.zip;2a4017c32892b90d072a9102eba90ae691fae36d
dawn;https://github.com/google/dawn/archive/794b6fadc4171f7b853a77ffdf0948fbec431f41.zip;77bb02deace0d140411f02a2fb8f5f925ea6a1b6
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.9.0.tar.gz;a2765979f64efb173a4b8ba4de39dcba9c655786
duktape;https://github.com/svaarala/duktape/releases/download/v2.7.0/duktape-2.7.0.tar.xz;8200c8e417dbab7adcc12c4dbdef7651cfc55794
3 changes: 0 additions & 3 deletions cmake/external/abseil-cpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@ onnxruntime_fetchcontent_makeavailable(abseil_cpp)
FetchContent_GetProperties(abseil_cpp)
if(abseil_cpp_SOURCE_DIR)
set(ABSEIL_SOURCE_DIR ${abseil_cpp_SOURCE_DIR})
if(onnxruntime_USE_WEBGPU)
set(DAWN_ABSEIL_DIR ${abseil_cpp_SOURCE_DIR})
endif()
endif()

# abseil_cpp_SOURCE_DIR is non-empty if we build it from source
Expand Down
23 changes: 14 additions & 9 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,6 @@ onnxruntime_fetchcontent_makeavailable(Protobuf)
if(Protobuf_FOUND)
message(STATUS "Using protobuf from find_package(or vcpkg). Protobuf version: ${Protobuf_VERSION}")
else()
if(protobuf_SOURCE_DIR)
if(onnxruntime_USE_WEBGPU)
set(DAWN_PROTOBUF_DIR ${protobuf_SOURCE_DIR})
endif()
endif()
# Adjust warning flags
if (TARGET libprotoc)
if (NOT MSVC)
Expand Down Expand Up @@ -645,19 +640,28 @@ if (onnxruntime_USE_WEBGPU)
set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE)
set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE)
set(DAWN_BUILD_PROTOBUF OFF CACHE BOOL "" FORCE)
set(DAWN_BUILD_TESTS OFF CACHE BOOL "" FORCE)
if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE BOOL "" FORCE)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
set(DAWN_BUILD_MONOLITHIC_LIBRARY SHARED CACHE BOOL "" FORCE)
set(DAWN_ENABLE_INSTALL ON CACHE BOOL "" FORCE)

if (onnxruntime_USE_EXTERNAL_DAWN)
message(FATAL_ERROR "onnxruntime_USE_EXTERNAL_DAWN and onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY cannot be enabled at the same time.")
message(FATAL_ERROR "onnxruntime_USE_EXTERNAL_DAWN and onnxruntime_BUILD_DAWN_SHARED_LIBRARY cannot be enabled at the same time.")
endif()
else()
# use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size
set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)

# use the same protobuf/abseil for ORT and Dawn when static linking
if(abseil_cpp_SOURCE_DIR)
set(DAWN_ABSEIL_DIR ${abseil_cpp_SOURCE_DIR})
endif()
if(protobuf_SOURCE_DIR)
set(DAWN_PROTOBUF_DIR ${protobuf_SOURCE_DIR})
endif()
endif()

if (onnxruntime_ENABLE_PIX_FOR_WEBGPU_EP)
Expand Down Expand Up @@ -714,6 +718,7 @@ if (onnxruntime_USE_WEBGPU)
set(DAWN_ENABLE_D3D11 OFF CACHE BOOL "" FORCE)
endif()
endif()

if (onnxruntime_CUSTOM_DAWN_SRC_PATH)
# use the custom dawn source path if provided
#
Expand Down Expand Up @@ -766,7 +771,7 @@ if (onnxruntime_USE_WEBGPU)
endif()

if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::webgpu_dawn)
else()
if (NOT onnxruntime_USE_EXTERNAL_DAWN)
Expand Down
4 changes: 2 additions & 2 deletions cmake/onnxruntime_java.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ if (WIN32)
)
endif()
endif()
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:dawn::webgpu_dawn> ${JAVA_PACKAGE_LIB_DIR}/$<TARGET_FILE_NAME:dawn::webgpu_dawn>)
endif()
endif()
Expand All @@ -223,7 +223,7 @@ else()
if (onnxruntime_USE_QNN AND NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB)
add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:onnxruntime_providers_qnn> ${JAVA_PACKAGE_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:onnxruntime_providers_qnn>)
endif()
if (onnxruntime_USE_WEBGPU AND onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
if (onnxruntime_USE_WEBGPU AND onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:dawn::webgpu_dawn> ${JAVA_PACKAGE_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:dawn::webgpu_dawn>)
endif()
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_nodejs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ if (onnxruntime_USE_WEBGPU)
list(APPEND NODEJS_DLL_DEPS "$<TARGET_FILE_DIR:dxcompiler>/dxcompiler.dll")
endif()
endif()
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
list(APPEND NODEJS_DLL_DEPS "$<TARGET_FILE:dawn::webgpu_dawn>")
endif()
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_webgpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@

set(onnxruntime_providers_webgpu_dll_deps)

if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
target_link_libraries(onnxruntime_providers_webgpu dawn::webgpu_dawn)

if (WIN32)
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1100,7 +1100,7 @@ if (onnxruntime_USE_WEBGPU)
)
endif()
endif()
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
if (onnxruntime_BUILD_DAWN_SHARED_LIBRARY)
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
Expand Down
4 changes: 2 additions & 2 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
onnx_test_runner_common onnxruntime_test_utils onnxruntime_common
onnxruntime onnxruntime_flatbuffers onnx_test_data_proto
${onnxruntime_EXTERNAL_LIBRARIES}
${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
absl::flags absl::flags_parse ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
if(NOT WIN32)
if(onnxruntime_USE_SNPE)
list(APPEND onnxruntime_perf_test_libs onnxruntime_providers_snpe)
Expand All @@ -1278,7 +1278,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_link_libraries(onnxruntime_perf_test PRIVATE debug dbghelp advapi32)
endif()
else()
target_link_libraries(onnxruntime_perf_test PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs})
target_link_libraries(onnxruntime_perf_test PRIVATE onnx_test_runner_common absl::flags absl::flags_parse ${onnx_test_libs})
endif()
set_target_properties(onnxruntime_perf_test PROPERTIES FOLDER "ONNXRuntimeTest")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ platforms:
- platformName: android
deviceName: Samsung Galaxy S22 Ultra
platformVersion: 12.0
browserstackLocal: true
browserstackLocal: false
buildName: ORT android test
buildIdentifier: ${BUILD_NUMBER}
projectName: ORT-UITests
debug: true
networkLogs: false
testContextOptions:
skipSessionStatus: true
skipSessionStatus: true
7 changes: 6 additions & 1 deletion include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,12 @@ class IExecutionProvider {
/**
Get the device id of current execution provider
*/
virtual int GetDeviceId() const { return default_device_.Id(); };
virtual int GetDeviceId() const { return default_device_.Id(); }

/**
* Get the OrtDevice the execution provider was registered with.
*/
const OrtDevice& GetDevice() const { return default_device_; }

/**
Get execution provider's configuration options.
Expand Down
7 changes: 7 additions & 0 deletions include/onnxruntime/core/framework/ortdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ struct OrtDevice {
return alignment < other.alignment;
}

bool EqualIgnoringAlignment(const OrtDevice& other) const {
return device_type == other.device_type &&
memory_type == other.memory_type &&
vendor_id == other.vendor_id &&
device_id == other.device_id;
}

private:
// Device type.
int32_t device_type : 8;
Expand Down
11 changes: 10 additions & 1 deletion include/onnxruntime/core/session/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ class Environment {
return shared_allocators_;
}

/**
* Returns an AllocatorPtr for a shared IAllocator based allocator if it matches the memory info.
* The OrtMemoryInfo name and whether it's an arena or device allocator is ignored in the lookup, as is the
* alignment.
* The user calling this function is not expected to know the alignment, and we expect the allocator instance to be
* created with a valid alignment for the device.
*/
AllocatorPtr GetRegisteredSharedAllocator(const OrtMemoryInfo& mem_info) const;

/**
* Removes registered allocator that was previously registered for sharing between multiple sessions.
*/
Expand Down Expand Up @@ -171,7 +180,7 @@ class Environment {
std::unique_ptr<onnxruntime::concurrency::ThreadPool> inter_op_thread_pool_;
bool create_global_thread_pools_{false};

std::mutex mutex_;
mutable std::mutex mutex_;

// shared allocators from various sources.
// CreateAndRegisterAllocator[V2]: IAllocator allocators created by ORT
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/core/dll/delay_load_hook.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
// - https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order#alternate-search-order-for-unpackaged-apps
//
// The DLL DelayLoad hook is only enabled when the compiler is MSVC and at least one of the following is True:
// - both USE_WEBGPU and BUILD_DAWN_MONOLITHIC_LIBRARY are defined
// - both USE_WEBGPU and BUILD_DAWN_SHARED_LIBRARY are defined
// - USE_DML is defined
//
#if defined(USE_WEBGPU) && defined(BUILD_DAWN_MONOLITHIC_LIBRARY)
#if defined(USE_WEBGPU) && defined(BUILD_DAWN_SHARED_LIBRARY)
#define ORT_DELAY_LOAD_WEBGPU_DAWN_DLL 1
#else
#define ORT_DELAY_LOAD_WEBGPU_DAWN_DLL 0
Expand All @@ -45,7 +45,7 @@

namespace {

#define DEFINE_KNOWN_DLL(name) {#name ".dll", L#name L".dll"}
#define DEFINE_KNOWN_DLL(name) {#name ".dll", L## #name L".dll"}

constexpr struct {
const char* str;
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/cpu/ml/tree_ensemble_attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,10 @@ struct TreeEnsembleAttributesV5 {
int64_t curr_treeid = 0;
for (const int64_t& tree_root : tree_roots) {
size_t tree_root_size_t = onnxruntime::narrow<size_t>(tree_root);
bool is_leaf = (nodes_falsenodeids[tree_root_size_t] == nodes_truenodeids[tree_root_size_t] &&
nodes_falseleafs[tree_root_size_t] && nodes_trueleafs[tree_root_size_t]);
transformInputOneTree(tree_root_size_t, curr_treeid, 0,
nodes_falsenodeids[tree_root_size_t] == nodes_truenodeids[tree_root_size_t],
is_leaf,
membership_values_by_id, output);
curr_treeid++;
}
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,10 @@
}
*/

// guard against bad device discovery. max devices we expect to add is num_cuda_devices. if we're attempting
// to add more than that we have duplicates in the `devices` array.
max_ep_devices = std::min(max_ep_devices, static_cast<size_t>(num_cuda_devices));

Check notice on line 739 in onnxruntime/core/providers/cuda/cuda_provider_factory.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/cuda/cuda_provider_factory.cc#L739

Add #include <algorithm> for min [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/cuda/cuda_provider_factory.cc:739:  Add #include <algorithm> for min  [build/include_what_you_use] [4]

int16_t device_id = 0;
for (size_t i = 0; i < num_devices && num_ep_devices < max_ep_devices; ++i) {
const OrtHardwareDevice& device = *devices[i];
Expand Down
Loading
Loading