Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
4a3b63f
Weaken dxcore dependency (#24845)
skottmckay Jun 2, 2025
d5fa2ac
Improve Windows ETW callback registration and fix issues (#24877)
yuslepukhin Jun 2, 2025
915a999
[WebGPU] Unify core implementations of GEMM and MatMul (#24586)
xiaofeihan1 Jun 2, 2025
22c2581
[DNNL] update tests (#24921)
tianleiwu Jun 2, 2025
b34ae7c
Update README.md: remove the build pipeline status section (#24548)
snnn Jun 2, 2025
3ca8a49
[QNN-EP] Replace Upsample with Resize during Quantization (#24896)
chuteng-quic Jun 2, 2025
49ed0e8
[QNN-EP] Fuse ChannelShuffle pattern (#24904)
qti-yuduo Jun 2, 2025
03b22ff
[WebGPU EP] adjust test case to skip CoreML EP (#24927)
prathikr Jun 2, 2025
53ee6c5
[CUDA] FpA IntB Gemm Weight Conversion in GPU (#24914)
tianleiwu Jun 3, 2025
340b188
Fusing Initializers with Graph Transforms (#24726)
sunnyshu-intel Jun 3, 2025
24e0b07
Cast Nodes Fusion (#24842)
nenad1002 Jun 3, 2025
f250ffc
Fix a test failure in mlas (#24930)
snnn Jun 3, 2025
11bcce3
[webgpu] Bump version of Dawn to c3999d7e3 (#24935)
fs-eire Jun 3, 2025
665922d
Remove ep_weight_sharing_ctx_gen tool from QNN EP python wheel (#24895)
HectorSVC Jun 3, 2025
36fc8c8
[MIGraphX EP] Add migx ep fp8 support and int4 weights (#23534)
TedThemistokleous Jun 3, 2025
3426f64
Support activation broadcasting in XNNPACK Matmul (#24908)
hariharans29 Jun 3, 2025
8b3326e
Add support for bool type in SplitToSequence (#24929)
mauriciocm9 Jun 3, 2025
c7fdb54
Extend OrtAllocator API to get Allocator statistics (#24785)
toothache Jun 4, 2025
242cb43
Run Matmul 8 bit tests on CPU builds (#24946)
hariharans29 Jun 4, 2025
82016f3
Fix initialization of same_node_ in TreeEnsemble (#24654)
xadupre Jun 4, 2025
9fa70d6
Skip `_tpause` call for `_M_ARM64EC` in spin_pause.cc. (#24942)
edgchen1 Jun 4, 2025
f7ed05c
A temporary fix for layout opt level to unblock react native android …
tianleiwu Jun 4, 2025
660ff3b
Add kleidiai to onnxruntime_EXTERNAL_LIBRARIES in setup_kleidiai(). (…
edgchen1 Jun 4, 2025
30415e6
[webgpu] fix a build warning in latest GCC (#24944)
fs-eire Jun 4, 2025
8484199
Fix libonnxruntime4j_jni.so 16KB page size compatibility on Android A…
Copilot Jun 4, 2025
5624a2b
Bump ruff from 0.11.11 to 0.11.12 (#24923)
dependabot[bot] Jun 4, 2025
5fdd4e4
Add ONNX RMSNormalization(23) (#24875)
titaiwangms Jun 4, 2025
ab5ff6a
[CUDA] fp16 intB gemm scale only kernel (#24955)
tianleiwu Jun 5, 2025
80555f5
[MIGraphX EP]Add support for skipLayerNormalization (#123) (#24886)
TedThemistokleous Jun 5, 2025
cb4023a
[ROCM EP] - Updates to enable HIPBLAS API updates (#122) (#24885)
TedThemistokleous Jun 5, 2025
c6d062b
[ROCm EP]Add ROCm execution provider to excluded EP for test with Cud…
TedThemistokleous Jun 5, 2025
ed7c234
Update deprecated CUDA api (#24733)
yf711 Jun 5, 2025
d4076dc
fix WebAssembly single thread build (#24933)
fs-eire Jun 5, 2025
9415b94
[cpu] fix Sigmoid f32 implementation on non-amd64 platform (#24749)
fs-eire Jun 5, 2025
97d8d90
[QNN-EP] Add Support for CumSum in QNN EP (#24820)
Akupadhye Jun 5, 2025
a724459
[QNN EP] Add 16x16 Gemm translation (#24849)
quic-tirupath Jun 5, 2025
46caf47
[QNN EP] Fix 16x16 MatMul translation (#24846)
quic-tirupath Jun 5, 2025
813a6d3
Revert "[webgpu] Bump version of Dawn to c3999d7e3 (#24935)" (#24966)
fs-eire Jun 5, 2025
1c577b7
Allow FP16 math in flash attention (#24953)
sushraja-msft Jun 6, 2025
494d356
faster shut down (#24891)
snnn Jun 6, 2025
5cae8d2
[WebNN] Add validation for node input tensor ranks (#24951)
NingW101 Jun 6, 2025
ebcf2eb
[CoreML] Update Conv and Softmax ops (#24594)
carzh Jun 6, 2025
a1e6842
[web] handle script URL missing when in ESM (#24968)
fs-eire Jun 6, 2025
bacb3f5
[DML] Restore compatibility with Windows Sdk 10.0.17134.0 (#24950)
JulienMaille Jun 6, 2025
89258ae
[CUDA] upgrade cudnn front end to 1.12 (#24977)
tianleiwu Jun 8, 2025
51da994
[MIGraphX EP] Adding Ortvalue features support for MGX EP (#23404)
TedThemistokleous Jun 9, 2025
c9e5889
[CoreML] mark input params variable unused (#24976)
carzh Jun 9, 2025
21fbd6c
Use c++17 to simplify the AllocatorDefaultAllocAligned function (#24984)
snnn Jun 9, 2025
2a7f4ca
Fix c/c++ document generation (#24979)
baijumeswani Jun 9, 2025
a1217d5
Adding support for Turing Arch (#24882)
umangb-09 Jun 9, 2025
c5b48ae
[webgpu] Restore FP16 math in flash attention generation (#24994)
qjia7 Jun 9, 2025
64a719f
Remove `sequentially_access_by_threads` for Conv (#24938)
xiaofeihan1 Jun 9, 2025
8412cac
[webgpu] add cases to cover the subgroup matrix path (#24970)
xhcao Jun 9, 2025
d07e85b
[WebGPU EP] bug fix for convolution operator (#25000)
prathikr Jun 10, 2025
0e52117
Update googletest to v1.17.0 in deps.txt (#25009)
fs-eire Jun 10, 2025
06fff6e
Integration with ONNX 1.18.0 (#24945)
titaiwangms Jun 10, 2025
5284c33
Split DML nuget packaging jobs to a dedicated pipeline (#25014)
snnn Jun 10, 2025
f810e55
Update transformers package version to 4.48.0 (#24982)
vraspar Jun 10, 2025
5467921
TRT RTX EP changes (#25015)
ishwar-raut1 Jun 10, 2025
aa64037
Bump ruff from 0.11.12 to 0.11.13 (#24988)
dependabot[bot] Jun 10, 2025
f390eb5
[QNN-EP] Support non-last axis TopK. (#24881)
minfhong-quic Jun 11, 2025
81c5f59
Merge branch 'master' into ovep-develop
ankitm3k Jun 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/linux-wasm-ci-build-and-test-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ on:
required: false
type: boolean
default: false
use_vcpkg:
required: false
type: boolean
default: true
enable_wasm_threads:
required: false
type: boolean
default: true
build_jsep:
required: false
type: boolean
Expand All @@ -29,7 +37,7 @@ jobs:
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Ubuntu2204-AMD-CPU"]
env:
buildArch: x64
common_build_args: --parallel --use_vcpkg --use_vcpkg_ms_internal_asset_cache --config ${{ inputs.build_config }} --skip_submodule_sync --build_wasm --enable_wasm_simd --enable_wasm_threads ${{ inputs.extra_build_args }}
common_build_args: --parallel ${{ inputs.use_vcpkg == true && '--use_vcpkg --use_vcpkg_ms_internal_asset_cache' || '' }} --config ${{ inputs.build_config }} --skip_submodule_sync --build_wasm --enable_wasm_simd ${{ inputs.enable_wasm_threads == true && '--enable_wasm_threads' || '' }} ${{ inputs.extra_build_args }}

steps:
- name: Checkout code
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/web.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ jobs:
build_jsep: true
build_webgpu: true

wasm_Release_static_library:
needs: precheck
uses: ./.github/workflows/linux-wasm-ci-build-and-test-workflow.yml
with:
build_config: Release
extra_build_args: "--skip_tests --enable_wasm_api_exception_catching --disable_rtti --build_wasm_static_lib"
use_vcpkg: false
enable_wasm_threads: false
skip_publish: true

web_Debug:
needs:
- precheck
Expand Down
20 changes: 0 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,6 @@
- ONNX Runtime Inferencing: [microsoft/onnxruntime-inference-examples](https://github.com/microsoft/onnxruntime-inference-examples)
- ONNX Runtime Training: [microsoft/onnxruntime-training-examples](https://github.com/microsoft/onnxruntime-training-examples)

## Builtin Pipeline Status

|System|Inference|Training|
|---|---|---|
|Windows|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20CPU%20CI%20Pipeline?label=Windows+CPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=9)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20GPU%20CUDA%20CI%20Pipeline?label=Windows+GPU+CUDA)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=218)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20GPU%20TensorRT%20CI%20Pipeline?label=Windows+GPU+TensorRT)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=47)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20GPU%20WebGPU%20CI%20Pipeline?label=Windows+GPU+WebGPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=228)||
|Linux|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Linux%20CPU%20CI%20Pipeline?label=Linux+CPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=11)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Linux%20CPU%20Minimal%20Build%20E2E%20CI%20Pipeline?label=Linux+CPU+Minimal+Build)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=64)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Linux%20GPU%20CI%20Pipeline?label=Linux+GPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=12)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Linux%20GPU%20TensorRT%20CI%20Pipeline?label=Linux+GPU+TensorRT)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=45)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Linux%20OpenVINO%20CI%20Pipeline?label=Linux+OpenVINO)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=55)|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/orttraining-linux-ci-pipeline?label=Linux+CPU+Training)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=86)<br>[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/orttraining-linux-gpu-ci-pipeline?label=Linux+GPU+Training)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=84)|
|Mac|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/MacOS%20CI%20Pipeline?label=MacOS+CPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=13)||
|Android|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Android%20CI%20Pipeline?label=Android)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=53)||
|iOS|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/iOS%20CI%20Pipeline?label=iOS)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=134)||
|Web|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/ONNX%20Runtime%20Web%20CI%20Pipeline?label=Web)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=161)||
|Other|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/onnxruntime-binary-size-checks-ci-pipeline?repoName=microsoft%2Fonnxruntime&label=Binary+Size+Check)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=187&repoName=microsoft%2Fonnxruntime)||

This project is tested with [BrowserStack](https://www.browserstack.com/home).

## Third-party Pipeline Status

|System|Inference|Training|
|---|---|---|
|Linux|[![Build Status](https://github.com/Ascend/onnxruntime/actions/workflows/build-and-test.yaml/badge.svg)](https://github.com/Ascend/onnxruntime/actions/workflows/build-and-test.yaml)||

## Releases

The current release and past releases can be found here: https://github.com/microsoft/onnxruntime/releases.
Expand Down
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ if (onnxruntime_USE_ROCM)
if (HIPIFY_PERL_PATH-NOTFOUND)
MESSAGE(FATAL_ERROR "hipify-perl not found")
endif()
MESSAGE("HIPIFY PATH:"${HIPIFY_PERL_PATH}/hipify-perl)
set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl)
endif()

Expand Down
2 changes: 2 additions & 0 deletions cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ if (ANDROID)
# Build shared libraries with support for 16 KB ELF alignment
# https://source.android.com/docs/core/architecture/16kb-page-size/16kb#build-lib-16kb-alignment
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,max-page-size=16384")
# Also apply to MODULE libraries (like libonnxruntime4j_jni.so)
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,max-page-size=16384")
endif()

# Enable space optimization for gcc/clang
Expand Down
9 changes: 3 additions & 6 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
#since the file contains a version string: "lts_20230802". However, the file is for debugging purposes only and would
#not affect built binaries.
#
# NOTE: You must run deps_update_and_upload.py and generate_cgmanifest.py when ready to test your changes in a CI.
# See https://microsoft.sharepoint.com/teams/ONNX2/_layouts/OneNote.aspx?id=%2Fteams%2FONNX2%2FShared%20Documents%2FNotebooks%2FONNX%20Ecosystem%20Team%20Notebook&wd=target%28Development.one%7C63D3AB47-51D1-4A62-9965-66882234BD44%2FAdd%20or%20update%20a%20dependency%20in%20deps.txt%7C0E9ED71D-89D5-40FA-B05F-C0123289C591%2F%29
#
abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20240722.0.zip;36ee53eb1466fb6e593fc5c286680de31f8a494a
coremltools;https://github.com/apple/coremltools/archive/refs/tags/7.1.zip;f1bab0f30966f2e217d8e01207d518f230a1641a
cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0
Expand All @@ -29,15 +26,15 @@ flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.8.5.zip;cd47d3d272faf353600c8cc2fdec2b52d6f69177
googletest;https://github.com/google/googletest/archive/refs/tags/v1.15.0.zip;9d2d0af8d77ac726ea55d44a8fa727ec98311349
googletest;https://github.com/google/googletest/archive/refs/tags/v1.17.0.zip;f638fa0e724760e2ba07ff8cfba32cd644e1ce28
#xnnpack 2024.09.04
googlexnnpack;https://github.com/google/XNNPACK/archive/fe98e0b93565382648129271381c14d6205255e3.zip;14f61dcf17cec2cde34ba2dcf61d6f24bf6059f3
json;https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.zip;5e88795165cc8590138d1f47ce94ee567b85b4d6
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
onnx;https://github.com/onnx/onnx/archive/7fc2b81a275223f5b02a522d9d2649837542a7be.zip;555338a12903941bb45f57540476244f9ffee17b
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.18.0.zip;f156d032a3af91b66d554e11158b33ca77bbb1f2
# Use the latest commit of 10.9-GA
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
Expand All @@ -58,6 +55,6 @@ cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.9.2.zip;b7f8dc4a8
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0c12f53da76d0c31b03b9f0f8ec8f3b4.zip;239063aee4946a9af147b473a4c3da78ba7413b4
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
dawn;https://github.com/google/dawn/archive/4cb1f9be152a4fa6bb695c08cd707ab078a1e2fb.zip;de39336b7715f53c14eec61072293b85cc73b691
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.4.0.tar.gz;22d3b57b54a61c194ab256ff11b0353a3b220244
4 changes: 3 additions & 1 deletion cmake/external/cudnn_frontend.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ onnxruntime_fetchcontent_declare(
EXCLUDE_FROM_ALL
)

set(CUDNN_FRONTEND_SKIP_JSON_LIB OFF CACHE BOOL "" FORCE)
set(CUDNN_FRONTEND_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
set(CUDNN_FRONTEND_BUILD_UNIT_TESTS OFF CACHE BOOL "" FORCE)
set(CUDNN_FRONTEND_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(CUDNN_FRONTEND_BUILD_PYTHON_BINDINGS OFF CACHE BOOL "" FORCE)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})

onnxruntime_fetchcontent_makeavailable(cudnn_frontend)
4 changes: 0 additions & 4 deletions cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,6 @@ if(APPLE)
target_link_libraries(onnxruntime_common PRIVATE "-framework Foundation")
endif()

if(MSVC)
target_link_libraries(onnxruntime_common PRIVATE dxcore.lib)
endif()

if(MSVC)
if(onnxruntime_target_platform STREQUAL "ARM64")
set(ARM64 TRUE)
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_kernel_explorer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ elseif (onnxruntime_USE_ROCM)
)
auto_set_source_files_hip_language(${kernel_explorer_kernel_srcs} ${kernel_explorer_rocm_kernel_srcs})
target_sources(kernel_explorer PRIVATE ${kernel_explorer_rocm_kernel_srcs})
target_compile_definitions(kernel_explorer PRIVATE __HIP_PLATFORM_AMD__=1 __HIP_PLATFORM_HCC__=1 HIPBLAS_V2)
target_compile_definitions(kernel_explorer PRIVATE __HIP_PLATFORM_AMD__=1 __HIP_PLATFORM_HCC__=1 HIPBLAS)
if (onnxruntime_USE_COMPOSABLE_KERNEL)
target_compile_definitions(kernel_explorer PRIVATE USE_COMPOSABLE_KERNEL)
if (onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE)
Expand Down
3 changes: 3 additions & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ function(setup_kleidiai)
${MLAS_SRC_DIR}/kai_ukernel_interface.cpp
)
target_link_libraries(onnxruntime_mlas PRIVATE kleidiai)

list(APPEND onnxruntime_EXTERNAL_LIBRARIES kleidiai)
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES} PARENT_SCOPE)
endfunction()

if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_dml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
if (GDK_PLATFORM STREQUAL Scarlett)
target_link_libraries(onnxruntime_providers_dml PRIVATE ${gdk_dx_libs})
else()
target_link_libraries(onnxruntime_providers_dml PRIVATE dxguid.lib d3d12.lib dxgi.lib dxcore.lib)
target_link_libraries(onnxruntime_providers_dml PRIVATE dxguid.lib d3d12.lib dxgi.lib)
endif()

target_link_libraries(onnxruntime_providers_dml PRIVATE delayimp.lib)
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_rocm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@

set_target_properties(onnxruntime_providers_rocm PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers_rocm PROPERTIES FOLDER "ONNXRuntime")
target_compile_definitions(onnxruntime_providers_rocm PRIVATE HIPBLAS_V2)
target_compile_definitions(onnxruntime_providers_rocm PRIVATE HIPBLAS)

if (onnxruntime_ENABLE_TRAINING)
target_include_directories(onnxruntime_providers_rocm PRIVATE ${ORTTRAINING_ROOT} ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining ${MPI_CXX_INCLUDE_DIRS})
Expand Down
11 changes: 4 additions & 7 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ set(onnxruntime_pybind11_state_static_providers
if(onnxruntime_BUILD_QNN_EP_STATIC_LIB)
list(APPEND onnxruntime_pybind11_state_static_providers PRIVATE onnxruntime_providers_qnn)
endif()

if(WIN32)
# onnxruntime_pybind11_state is a DLL
target_sources(onnxruntime_pybind11_state PRIVATE "${ONNXRUNTIME_ROOT}/core/dll/dllmain.cc")
endif()
target_link_libraries(onnxruntime_pybind11_state PRIVATE
onnxruntime_session
${onnxruntime_libs}
Expand Down Expand Up @@ -1064,12 +1067,6 @@ if (onnxruntime_USE_QNN)
${QNN_LIB_FILES}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
)
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
$<TARGET_FILE:ep_weight_sharing_ctx_gen>
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
)
if (EXISTS "${onnxruntime_QNN_HOME}/Qualcomm AI Hub Proprietary License.pdf")
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
Expand Down
8 changes: 8 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,14 @@ endif()

# shared lib
if (onnxruntime_BUILD_SHARED_LIB)
if(WIN32)
AddTest(DYN
TARGET onnxruntime_shared_lib_dlopen_test
SOURCES ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/dlopen_main.cc
LIBS onnxruntime
DEPENDS ${all_dependencies}
)
endif()
onnxruntime_add_static_library(onnxruntime_mocked_allocator ${TEST_SRC_DIR}/util/test_allocator.cc)
target_include_directories(onnxruntime_mocked_allocator PUBLIC ${TEST_SRC_DIR}/util/include)
target_link_libraries(onnxruntime_mocked_allocator PRIVATE ${GSL_TARGET})
Expand Down
100 changes: 51 additions & 49 deletions cmake/onnxruntime_webassembly.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -503,58 +503,60 @@ jsepDownload:_pp_")

set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME ${target_name} SUFFIX ".mjs")

#
# The following POST_BUILD script is a workaround for enabling:
# - using onnxruntime-web with Multi-threading enabled when import from CDN
# - using onnxruntime-web when consumed in some frameworks like Vite
#
# In the use case mentioned above, the file name of the script may be changed. So we need to replace the line:
# `new Worker(new URL("ort-wasm-*.mjs", import.meta.url),`
# with
# `new Worker(new URL(import.meta.url),`
#
# This behavior is introduced in https://github.com/emscripten-core/emscripten/pull/22165. Since it's unlikely to be
# reverted, and there is no config to disable this behavior, we have to use a post-build script to workaround it.
#

# Generate a script to do the post-build work
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/wasm_post_build.js "
const fs = require('fs');
const path = require('path');

// node wasm_post_build.js <mjsFilePath>
const mjsFilePath = process.argv[2];
let contents = fs.readFileSync(mjsFilePath).toString();

const regex = 'new Worker\\\\(new URL\\\\(\".+?\", ?import\\\\.meta\\\\.url\\\\),';
const matches = [...contents.matchAll(new RegExp(regex, 'g'))];
if (matches.length !== 1) {
throw new Error(
`Unexpected number of matches for \"${regex}\" in \"${filepath}\": ${matches.length}.`,
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
#
# The following POST_BUILD script is a workaround for enabling:
# - using onnxruntime-web with Multi-threading enabled when import from CDN
# - using onnxruntime-web when consumed in some frameworks like Vite
#
# In the use case mentioned above, the file name of the script may be changed. So we need to replace the line:
# `new Worker(new URL("ort-wasm-*.mjs", import.meta.url),`
# with
# `new Worker(new URL(import.meta.url),`
#
# This behavior is introduced in https://github.com/emscripten-core/emscripten/pull/22165. Since it's unlikely to be
# reverted, and there is no config to disable this behavior, we have to use a post-build script to workaround it.
#

# Generate a script to do the post-build work
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/wasm_post_build.js "
const fs = require('fs');
const path = require('path');

// node wasm_post_build.js <mjsFilePath>
const mjsFilePath = process.argv[2];
let contents = fs.readFileSync(mjsFilePath).toString();

const regex = 'new Worker\\\\(new URL\\\\(\".+?\", ?import\\\\.meta\\\\.url\\\\),';
const matches = [...contents.matchAll(new RegExp(regex, 'g'))];
if (matches.length !== 1) {
throw new Error(
`Unexpected number of matches for \"\${regex}\" in \"\${mjsFilePath}\": \${matches.length}.`,
);
}

// Replace the only occurrence.
contents = contents.replace(
new RegExp(regex),
`new Worker(new URL(import.meta.url),`,
);
}

// Replace the only occurrence.
contents = contents.replace(
new RegExp(regex),
`new Worker(new URL(import.meta.url),`,
);
fs.writeFileSync(mjsFilePath, contents);
"
)

fs.writeFileSync(mjsFilePath, contents);
"
)
find_program(NODE_EXECUTABLE node required)
if (NOT NODE_EXECUTABLE)
message(FATAL_ERROR "Node is required to run the post-build script")
endif()

find_program(NODE_EXECUTABLE node required)
if (NOT NODE_EXECUTABLE)
message(FATAL_ERROR "Node is required to run the post-build script")
add_custom_command(
TARGET onnxruntime_webassembly
POST_BUILD
# Backup file at $<TARGET_FILE_NAME:onnxruntime_webassembly>.bak
COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE_NAME:onnxruntime_webassembly>" "$<TARGET_FILE_NAME:onnxruntime_webassembly>.bak"
COMMAND ${CMAKE_COMMAND} -E echo "Performing post-process for $<TARGET_FILE_NAME:onnxruntime_webassembly>"
COMMAND ${NODE_EXECUTABLE} "${CMAKE_CURRENT_BINARY_DIR}/wasm_post_build.js" "$<TARGET_FILE_NAME:onnxruntime_webassembly>"
)
endif()

add_custom_command(
TARGET onnxruntime_webassembly
POST_BUILD
# Backup file at $<TARGET_FILE_NAME:onnxruntime_webassembly>.bak
COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE_NAME:onnxruntime_webassembly>" "$<TARGET_FILE_NAME:onnxruntime_webassembly>.bak"
COMMAND ${CMAKE_COMMAND} -E echo "Performing post-process for $<TARGET_FILE_NAME:onnxruntime_webassembly>"
COMMAND ${NODE_EXECUTABLE} "${CMAKE_CURRENT_BINARY_DIR}/wasm_post_build.js" "$<TARGET_FILE_NAME:onnxruntime_webassembly>"
)
endif()
Loading
Loading