Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ jobs:
run: |
set -e -x
BINARY_SIZE_THRESHOLD_ARGS=""
echo "Binary size threshold in bytes: 1722565"
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1722565"
echo "Binary size threshold in bytes: 1436672"
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672"

# Ensure ANDROID_NDK_HOME is available and get its real path
if [ -z "$ANDROID_NDK_HOME" ]; then
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/macos-ci-build-and-test-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
--build_objc
--build_java
--build_wheel
${{ matrix.target == 'arm64' && '--enable_arm_neon_nchwc' || '' }}
${{ inputs.use_webgpu && '--use_webgpu' || '' }}
${{ inputs.use_xnnpack && '--use_xnnpack' || '' }}
${{ inputs.use_coreml && '--use_coreml --skip_onnx_tests' || '' }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/react_native.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
run: sudo apt-get update && sudo apt-get install -y ninja-build

- name: Download Android AAR artifacts
uses: actions/download-artifact@v4
uses: actions/download-artifact@v5
with:
name: onnxruntime-android-full-aar
path: ${{ runner.temp }}/android-full-aar
Expand Down Expand Up @@ -221,7 +221,7 @@ jobs:
uses: actions/checkout@v5

- name: Download iOS pod artifact
uses: actions/download-artifact@v4
uses: actions/download-artifact@v5
with:
name: ios_pod
path: ${{ runner.temp }}/ios_pod
Expand Down Expand Up @@ -277,7 +277,7 @@ jobs:
uses: actions/checkout@v5

- name: Download iOS pod artifact
uses: actions/download-artifact@v4
uses: actions/download-artifact@v5
with:
name: ios_pod
path: ${{ runner.temp }}/ios_pod
Expand Down
113 changes: 63 additions & 50 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,9 @@ option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
option(onnxruntime_USE_SVE "Build with SVE support in MLAS" OFF)
option(onnxruntime_USE_ARM_NEON_NCHWC "Build with ARM Neon NCHWc kernels in MLAS" OFF)

option(onnxruntime_USE_KLEIDIAI "Build with KleidiAI integration in MLAS" OFF)
# iOS simulator build explicitly builds targets with USE_KLEIDIAI=ON so attempting to force override if so
if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
message(WARNING "Disabling KleidiAI: not supported on Apple x86_64 platforms")
set(onnxruntime_USE_KLEIDIAI OFF CACHE BOOL "" FORCE)
endif()
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
Expand Down Expand Up @@ -258,6 +254,8 @@ if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_
message(FATAL_ERROR "GCC version must be greater than or equal to 11.1")
endif()

include(detect_onnxruntime_target_platform.cmake)

# ENABLE_TRAINING includes all training functionality
# The following 2 entry points
# 1. ORTModule
Expand Down Expand Up @@ -434,14 +432,6 @@ set(ORTTRAINING_SOURCE_DIR ${ORTTRAINING_ROOT}/orttraining)

include(adjust_global_compile_flags.cmake)

if (APPLE)
if (NOT CMAKE_OSX_ARCHITECTURES)
message("Building ONNX Runtime for ${CMAKE_HOST_SYSTEM_PROCESSOR} CPU ARCH")
endif()
elseif (NOT WIN32 AND NOT APPLE)
message("Building ONNX Runtime for ${onnxruntime_target_platform} CPU ARCH")
endif()

# We need to link with libatomic on systems that do not have built-in atomics, or
# don't have built-in support for 8 byte atomics
# Derived from https://github.com/protocolbuffers/protobuf/blob/master/cmake/CMakeLists.txt
Expand Down Expand Up @@ -513,6 +503,66 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_ENABLE_PYTHON)
endif()
endif()

if (onnxruntime_USE_ARM_NEON_NCHWC)
message(STATUS "Building MLAS with ARM Neon NCHWc kernels")
endif()

if(onnxruntime_USE_SVE)
if(LINUX AND onnxruntime_target_platform STREQUAL "aarch64")
check_cxx_compiler_flag("-march=armv8.2-a+sve" HAS_ARM64_SVE)
if(HAS_ARM64_SVE)
message(STATUS "Compiler supports SVE!")
else()
message(WARNING "onnxruntime_USE_SVE was set but compiler does not support SVE. It will be disabled.")
set(onnxruntime_USE_SVE OFF)
endif()
else()
message(WARNING "onnxruntime_USE_SVE was set but it is not supported on this platform. It will be disabled.")
set(onnxruntime_USE_SVE OFF)
endif()
endif()

if(onnxruntime_USE_KLEIDIAI)
function(is_kleidiai_supported is_supported_var)
# check for supported target platforms
if(NOT (onnxruntime_target_platform STREQUAL "aarch64" OR
onnxruntime_target_platform STREQUAL "ARM64" OR
onnxruntime_target_platform STREQUAL "arm64"))
message(WARNING "KleidiAI is not supported on this platform.")

set(${is_supported_var} FALSE PARENT_SCOPE)
return()
endif()

# check for compiler support
if(MSVC)
# TODO detect on MSVC
else()
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
if(NOT HAS_ARM64_DOTPROD)
message(WARNING "The compiler doesn't support dotprod instructions.")
endif()
if(NOT HAS_ARM64_I8MM)
message(WARNING "The compiler doesn't support i8mm instructions.")
endif()
if(NOT HAS_ARM64_DOTPROD OR NOT HAS_ARM64_I8MM)
set(${is_supported_var} FALSE PARENT_SCOPE)
return()
endif()
endif()

set(${is_supported_var} TRUE PARENT_SCOPE)
endfunction()

is_kleidiai_supported(is_kleidiai_supported_result)

if(NOT is_kleidiai_supported_result)
message(WARNING "onnxruntime_USE_KLEIDIAI was set but it is not supported. It will be disabled.")
set(onnxruntime_USE_KLEIDIAI OFF)
endif()
endif()

#Dependencies begin
get_filename_component(ONNXRUNTIME_ROOT "${ONNXRUNTIME_ROOT}" ABSOLUTE)
get_filename_component(ORTTRAINING_ROOT "${ORTTRAINING_ROOT}" ABSOLUTE)
Expand Down Expand Up @@ -663,43 +713,6 @@ else()
endif()
endif()

if(onnxruntime_USE_SVE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
check_cxx_compiler_flag("-march=armv8.2-a+sve" HAS_ARM64_SVE)
if(HAS_ARM64_SVE)
message(STATUS "Compiler supports SVE!")
else()
message(WARNING "onnxruntime_USE_SVE was set but compiler does not support SVE. It will be disabled.")
set(onnxruntime_USE_SVE OFF)
endif()
else()
message(WARNING "onnxruntime_USE_SVE was set but it is not supported on this platform. It will be disabled.")
set(onnxruntime_USE_SVE OFF)
endif()
endif()

if (onnxruntime_USE_KLEIDIAI AND (
(onnxruntime_target_platform STREQUAL "aarch64") OR
(onnxruntime_target_platform STREQUAL "ARM64") OR
(APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")))

# TODO Add checks for MSVC Compilation
if(NOT MSVC)
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
if (NOT HAS_ARM64_DOTPROD)
message(FATAL_ERROR "The compiler doesn't support dotprod")
endif()
if (NOT HAS_ARM64_I8MM)
message(FATAL_ERROR "The compiler doesn't support i8mm")
endif()
else()
message(STATUS "Skipping -march= checks on MSVC (not supported), assuming dotprod/i8mm support manually.")
set(HAS_ARM64_DOTPROD TRUE)
set(HAS_ARM64_I8MM TRUE)
endif()
endif()

#names in this var must match the directory names under onnxruntime/core/providers
#ONNXRUNTIME_PROVIDER_NAMES is the list of providers that needs to export additional symbols in the global namespace.
#For example CUDA EP exports "OrtSessionOptionsAppendExecutionProvider_CUDA", which is a global function.
Expand Down
60 changes: 11 additions & 49 deletions cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -217,30 +217,20 @@ endmacro()
#Set global compile flags for all the source code(including third_party code like protobuf)
#This section must be before any add_subdirectory, otherwise build may fail because /MD,/MT mismatch
if (MSVC)
if (CMAKE_VS_PLATFORM_NAME)
# Multi-platform generator
set(onnxruntime_target_platform ${CMAKE_VS_PLATFORM_NAME})
else()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()
if (onnxruntime_target_platform STREQUAL "ARM64")
set(onnxruntime_target_platform "ARM64")
enable_language(ASM_MARMASM)
elseif (onnxruntime_target_platform STREQUAL "ARM64EC")
if (onnxruntime_target_platform STREQUAL "ARM64" OR
onnxruntime_target_platform STREQUAL "ARM64EC" OR
onnxruntime_target_platform STREQUAL "ARM")
enable_language(ASM_MARMASM)
elseif (onnxruntime_target_platform STREQUAL "ARM" OR CMAKE_GENERATOR MATCHES "ARM")
set(onnxruntime_target_platform "ARM")
enable_language(ASM_MARMASM)
elseif (onnxruntime_target_platform STREQUAL "x64" OR onnxruntime_target_platform STREQUAL "x86_64" OR onnxruntime_target_platform STREQUAL "AMD64" OR CMAKE_GENERATOR MATCHES "Win64")
set(onnxruntime_target_platform "x64")
enable_language(ASM_MASM)
elseif (onnxruntime_target_platform STREQUAL "Win32" OR onnxruntime_target_platform STREQUAL "x86" OR onnxruntime_target_platform STREQUAL "i386" OR onnxruntime_target_platform STREQUAL "i686")
set(onnxruntime_target_platform "x86")
elseif (onnxruntime_target_platform STREQUAL "x64" OR
onnxruntime_target_platform STREQUAL "x86")
enable_language(ASM_MASM)
message("Enabling SAFESEH for x86 build")
set(CMAKE_ASM_MASM_FLAGS "${CMAKE_ASM_MASM_FLAGS} /safeseh")

if (onnxruntime_target_platform STREQUAL "x86")
message("Enabling SAFESEH for x86 build")
set(CMAKE_ASM_MASM_FLAGS "${CMAKE_ASM_MASM_FLAGS} /safeseh")
endif()
else()
message(FATAL_ERROR "Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(FATAL_ERROR "Unsupported onnxruntime_target_platform value: ${onnxruntime_target_platform}")
endif()

#Always enable exception handling, even for Windows ARM
Expand Down Expand Up @@ -269,34 +259,6 @@ if (MSVC)
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL")
endif()
else()
if (NOT APPLE)
#XXX: Sometimes the value of CMAKE_SYSTEM_PROCESSOR is set but it's wrong. For example, if you run an armv7 docker
#image on an aarch64 machine with an aarch64 Ubuntu host OS, in the docker instance cmake may still report
# CMAKE_SYSTEM_PROCESSOR as aarch64 by default. Given compiling this code may need more than 2GB memory, we do not
# support compiling for ARM32 natively(only support cross-compiling), we will ignore this issue for now.
if(NOT CMAKE_SYSTEM_PROCESSOR)
message(WARNING "CMAKE_SYSTEM_PROCESSOR is not set. Please set it in your toolchain cmake file.")
# Try to detect it
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
execute_process(
COMMAND "${CMAKE_C_COMPILER}" -dumpmachine
OUTPUT_VARIABLE GCC_DUMP_MACHINE_OUT OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_VARIABLE _err
RESULT_VARIABLE _res
)
if(NOT _res EQUAL 0)
message(SEND_ERROR "Failed to run 'gcc -dumpmachine':\n ${_res}")
endif()
string(REPLACE "-" ";" GCC_DUMP_MACHINE_OUT_LIST "${GCC_DUMP_MACHINE_OUT}")
list(LENGTH GCC_DUMP_MACHINE_OUT_LIST GCC_TRIPLET_LEN)
if(GCC_TRIPLET_LEN EQUAL 4)
list(GET GCC_DUMP_MACHINE_OUT_LIST 0 CMAKE_SYSTEM_PROCESSOR)
message("Setting CMAKE_SYSTEM_PROCESSOR to ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
endif()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()
if (onnxruntime_BUILD_FOR_NATIVE_MACHINE)
string(APPEND CMAKE_CXX_FLAGS " -march=native -mtune=native")
string(APPEND CMAKE_C_FLAGS " -march=native -mtune=native")
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/de0ce7c7251372892e53c
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.9.2.zip;b7f8dc4a879765127ce31dfeabd31c556c80ec79
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v4.2.1.zip;5d2b21b10478556c5e209dd7229e298a5c9f0b02
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0c12f53da76d0c31b03b9f0f8ec8f3b4.zip;239063aee4946a9af147b473a4c3da78ba7413b4
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
Expand Down
80 changes: 80 additions & 0 deletions cmake/detect_onnxruntime_target_platform.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# This file will set the onnxruntime_target_platform variable, if applicable.
# onnxruntime_target_platform identifies the platform to compile for.
block(PROPAGATE onnxruntime_target_platform)

unset(onnxruntime_target_platform)

if (MSVC)
if (CMAKE_VS_PLATFORM_NAME)
# Multi-platform generator
set(onnxruntime_target_platform ${CMAKE_VS_PLATFORM_NAME})
else()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()

if (onnxruntime_target_platform STREQUAL "ARM64" OR
onnxruntime_target_platform STREQUAL "ARM64EC")
# Do nothing. We'll just use the current value of onnxruntime_target_platform.
elseif (onnxruntime_target_platform STREQUAL "ARM" OR
CMAKE_GENERATOR MATCHES "ARM")
set(onnxruntime_target_platform "ARM")
elseif (onnxruntime_target_platform STREQUAL "x64" OR
onnxruntime_target_platform STREQUAL "x86_64" OR
onnxruntime_target_platform STREQUAL "AMD64" OR
CMAKE_GENERATOR MATCHES "Win64")
set(onnxruntime_target_platform "x64")
elseif (onnxruntime_target_platform STREQUAL "Win32" OR
onnxruntime_target_platform STREQUAL "x86" OR
onnxruntime_target_platform STREQUAL "i386" OR
onnxruntime_target_platform STREQUAL "i686")
set(onnxruntime_target_platform "x86")
else()
message(FATAL_ERROR "Unknown target platform: ${onnxruntime_target_platform}")
endif()
elseif(APPLE)
if(DEFINED CMAKE_OSX_ARCHITECTURES)
# We'll only set onnxruntime_target_platform when CMAKE_OSX_ARCHITECTURES specifies a single architecture.
list(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_LEN)
if(CMAKE_OSX_ARCHITECTURES_LEN EQUAL 1)
set(onnxruntime_target_platform ${CMAKE_OSX_ARCHITECTURES})
endif()
else()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()
else()
#XXX: Sometimes the value of CMAKE_SYSTEM_PROCESSOR is set but it's wrong. For example, if you run an armv7 docker
#image on an aarch64 machine with an aarch64 Ubuntu host OS, in the docker instance cmake may still report
# CMAKE_SYSTEM_PROCESSOR as aarch64 by default. Given compiling this code may need more than 2GB memory, we do not
# support compiling for ARM32 natively(only support cross-compiling), we will ignore this issue for now.
if(NOT CMAKE_SYSTEM_PROCESSOR)
message(WARNING "CMAKE_SYSTEM_PROCESSOR is not set. Please set it in your toolchain cmake file.")
# Try to detect it
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
execute_process(
COMMAND "${CMAKE_C_COMPILER}" -dumpmachine
OUTPUT_VARIABLE GCC_DUMP_MACHINE_OUT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_VARIABLE _err
RESULT_VARIABLE _res
)
if(NOT _res EQUAL 0)
message(SEND_ERROR "Failed to run 'gcc -dumpmachine':\n ${_res}")
endif()
string(REPLACE "-" ";" GCC_DUMP_MACHINE_OUT_LIST "${GCC_DUMP_MACHINE_OUT}")
list(LENGTH GCC_DUMP_MACHINE_OUT_LIST GCC_TRIPLET_LEN)
if(GCC_TRIPLET_LEN EQUAL 4)
list(GET GCC_DUMP_MACHINE_OUT_LIST 0 CMAKE_SYSTEM_PROCESSOR)
message("Setting CMAKE_SYSTEM_PROCESSOR to ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
endif()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()

if(DEFINED onnxruntime_target_platform)
message(STATUS "onnxruntime_target_platform = ${onnxruntime_target_platform}")
else()
message(WARNING "onnxruntime_target_platform is not set")
endif()

endblock()
1 change: 1 addition & 0 deletions cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ onnxruntime_fetchcontent_declare(
URL ${DEP_URL_cutlass}
URL_HASH SHA1=${DEP_SHA1_cutlass}
EXCLUDE_FROM_ALL
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1_maybe_unused.patch
)

FetchContent_GetProperties(cutlass)
Expand Down
Loading
Loading