intel · ankitm3k · Jun 25, 2025 · Jun 11, 2025 · Jun 11, 2025 · Jun 11, 2025
diff --git a/.github/workflows/linux-wasm-ci-build-and-test-workflow.yml b/.github/workflows/linux-wasm-ci-build-and-test-workflow.yml
@@ -92,7 +92,6 @@ jobs:
             ${{ env.common_build_args }} \
             --build_dir ${{ github.workspace }}/build/wasm_inferencing_webgpu \
             --use_webgpu \
-            --use_jsep \
             --use_webnn \
             --target onnxruntime_webassembly \
             --skip_tests
@@ -113,8 +112,8 @@ jobs:
         if: ${{ inputs.skip_publish != true && inputs.build_webgpu == true }}
         run: |
           mkdir -p ${{ github.workspace }}/artifacts/wasm_webgpu/
-          cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.jsep.wasm ${{ github.workspace }}/artifacts/wasm_webgpu/
-          cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.jsep.mjs ${{ github.workspace }}/artifacts/wasm_webgpu/
+          cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.asyncify.wasm ${{ github.workspace }}/artifacts/wasm_webgpu/
+          cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.asyncify.mjs ${{ github.workspace }}/artifacts/wasm_webgpu/
 
       - name: Upload WASM artifacts
         if: ${{ inputs.skip_publish != true }}

diff --git a/.github/workflows/windows-web-ci-workflow.yml b/.github/workflows/windows-web-ci-workflow.yml
@@ -16,9 +16,6 @@ on:
       package_name:
         type: string
         default: "NPM_packages"
-      run_webgpu_tests:
-        type: boolean
-        default: true
 
 jobs:
   build_onnxruntime_web:
@@ -86,6 +83,22 @@ jobs:
         run: |
           copy ${{ github.workspace }}\artifacts_wasm\ort-*.mjs ${{ github.workspace }}\js\web\dist\
 
+      - name: Download WebAssembly WebGPU artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ inputs.build_config }}_wasm_webgpu
+          path: ${{ github.workspace }}/artifacts_wasm_webgpu
+
+      - name: Binplace dist files (.wasm) for WebGPU
+        shell: cmd
+        run: |
+          copy ${{ github.workspace }}\artifacts_wasm_webgpu\ort-*.wasm ${{ github.workspace }}\js\web\dist\
+
+      - name: Binplace dist files (.mjs) for WebGPU
+        shell: cmd
+        run: |
+          copy ${{ github.workspace }}\artifacts_wasm_webgpu\ort-*.mjs ${{ github.workspace }}\js\web\dist\
+
       - name: npm ci for /js/
         run: npm ci
         working-directory: ${{ github.workspace }}/js
@@ -115,17 +128,7 @@ jobs:
         run: |
           Get-WmiObject Win32_Process -Filter "name = 'chrome.exe'" | Format-List CommandLine
 
-      - name: Run ort-web tests (wasm,webgl backend)
-        if: ${{ inputs.run_webgpu_tests != true }}
-        shell: cmd
-        run: |
-          mkdir ${{ runner.temp }}\web\test\01
-          dir ${{ runner.temp }}\web\test\01
-          npm test -- -e=chrome -b=webgl,wasm --user-data-dir=${{ runner.temp }}\web\test\01 --chromium-flags=--enable-logging --chromium-flags=--v=1
-        working-directory: ${{ github.workspace }}\js\web
-
       - name: Run ort-web tests (ALL backends)
-        if: ${{ inputs.run_webgpu_tests == true }}
         shell: cmd
         run: |
           mkdir ${{ runner.temp }}\web\test\02
@@ -134,7 +137,6 @@ jobs:
         working-directory: ${{ github.workspace }}\js\web
 
       - name: Run ort-web tests (Suite1, webgpu, IO-binding=gpu-tensor)
-        if: ${{ inputs.run_webgpu_tests == true }}
         shell: cmd
         run: |
           mkdir ${{ runner.temp }}\web\test\03
@@ -143,7 +145,6 @@ jobs:
         working-directory: ${{ github.workspace }}\js\web
 
       - name: Run ort-web tests (Suite1, webgpu, IO-binding=gpu-location)
-        if: ${{ inputs.run_webgpu_tests == true }}
         shell: cmd
         run: |
           mkdir ${{ runner.temp }}\web\test\04
@@ -169,27 +170,7 @@ jobs:
         working-directory: ${{ github.workspace }}\js\web
 
       # WebGPU EP tests
-      - name: Download WebAssembly WebGPU artifacts
-        if: ${{ inputs.run_webgpu_tests == true }}
-        uses: actions/download-artifact@v4
-        with:
-          name: ${{ inputs.build_config }}_wasm_webgpu
-          path: ${{ github.workspace }}/artifacts_wasm_webgpu
-
-      - name: Binplace dist files (.wasm) for WebGPU
-        if: ${{ inputs.run_webgpu_tests == true }}
-        shell: cmd
-        run: |
-          copy /Y ${{ github.workspace }}\artifacts_wasm_webgpu\ort-*.wasm ${{ github.workspace }}\js\web\dist\
-
-      - name: Binplace dist files (.mjs) for WebGPU
-        if: ${{ inputs.run_webgpu_tests == true }}
-        shell: cmd
-        run: |
-          copy /Y ${{ github.workspace }}\artifacts_wasm_webgpu\ort-*.mjs ${{ github.workspace }}\js\web\dist\
-
       - name: Run ort-web tests - WebGPU EP
-        if: ${{ inputs.run_webgpu_tests == true }}
         continue-on-error: true
         shell: cmd
         run: |
@@ -199,15 +180,15 @@ jobs:
         working-directory: ${{ github.workspace }}\js\web
 
       - name: Validate shader keys - WebGPU EP
-        if: ${{ inputs.run_webgpu_tests == true && inputs.build_config == 'Debug' }}
+        if: ${{ inputs.build_config == 'Debug' }}
         uses: ./.github/actions/webgpu-validate-shader-key
         with:
           log_file_path: ${{ runner.temp }}\web\test\07\chrome_debug.log
           is_chromium_log: true
 
       # this step is added to help investigate the shader validation failure which is hard to reproduce
       - name: Upload WebGPU shader validation log on failure
-        if: ${{ failure() && inputs.run_webgpu_tests == true && inputs.build_config == 'Debug' }}
+        if: ${{ failure() && inputs.build_config == 'Debug' }}
         uses: actions/upload-artifact@v4
         with:
           name: webgpu-shader-validation-logs

diff --git a/.github/workflows/windows_x86.yml b/.github/workflows/windows_x86.yml
@@ -87,7 +87,7 @@ jobs:
       - name: Build and Test
         shell: pwsh
         run: |
-          python.exe "${{ github.workspace }}\tools\ci_build\build.py" --config RelWithDebInfo --build_dir "${{ github.workspace }}\build" --skip_submodule_sync --build_csharp --parallel --use_binskim_compliant_compile_flags --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_onnx_tests --build_wheel --msbuild_extra_options "IncludeMobileTargets=false" --build_nuget --use_vcpkg --use_vcpkg_ms_internal_asset_cache
+          python.exe "${{ github.workspace }}\tools\ci_build\build.py" --config RelWithDebInfo --build_dir "${{ github.workspace }}\build" --skip_submodule_sync --build_csharp --parallel --use_binskim_compliant_compile_flags --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_onnx_tests --build_wheel --msbuild_extra_options "IncludeMobileTargets=false" --build_nuget --compile_no_warning_as_error --use_vcpkg --use_vcpkg_ms_internal_asset_cache
           if ($LASTEXITCODE -ne 0) {
             exit $LASTEXITCODE
           }

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -372,7 +372,7 @@ if (onnxruntime_USE_ROCM)
     if (HIPIFY_PERL_PATH-NOTFOUND)
       MESSAGE(FATAL_ERROR "hipify-perl not found")
     endif()
-    MESSAGE("HIPIFY PATH:"${HIPIFY_PERL_PATH}/hipify-perl)
+    MESSAGE("HIPIFY PATH: ${HIPIFY_PERL_PATH}/hipify-perl")
     set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl)
   endif()
 
@@ -1336,7 +1336,7 @@ function(onnxruntime_configure_target target_name)
   if(WIN32 AND onnxruntime_ENABLE_STATIC_ANALYSIS AND onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES)
     set_target_properties(${target_name} PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/EnableVisualStudioCodeAnalysis.props)
   endif()
-  target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${abseil_cpp_SOURCE_DIR})
+  target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
   if (onnxruntime_ENABLE_TRAINING_OPS)
     target_include_directories(${target_name} PRIVATE ${ORTTRAINING_ROOT})
   endif()
@@ -1669,6 +1669,10 @@ if (onnxruntime_ENABLE_DLPACK)
   add_compile_definitions(ENABLE_DLPACK)
 endif()
 
+if (onnxruntime_CALLER_FRAMEWORK)
+  add_definitions(-DORT_CALLER_FRAMEWORK="${onnxruntime_CALLER_FRAMEWORK}")
+endif()
+
 if (UNIX OR onnxruntime_USE_NCCL)
   # Find NCCL
   if (onnxruntime_USE_NCCL)

diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -9,7 +9,7 @@
 #since the file contains a version string: "lts_20230802". However, the file is for debugging purposes only and would
 #not affect built binaries.
 #
-abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20240722.0.zip;36ee53eb1466fb6e593fc5c286680de31f8a494a
+abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20250512.0.zip;3d6ff7e7ce144d9a53a53bef1f1bf79e1da4b8e1
 coremltools;https://github.com/apple/coremltools/archive/refs/tags/7.1.zip;f1bab0f30966f2e217d8e01207d518f230a1641a
 cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0
 date;https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.zip;2dac0c81dc54ebdd8f8d073a75c053b04b56e159
@@ -56,5 +56,5 @@ extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0
 composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
 cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
-dawn;https://github.com/google/dawn/archive/4cb1f9be152a4fa6bb695c08cd707ab078a1e2fb.zip;de39336b7715f53c14eec61072293b85cc73b691
+dawn;https://github.com/google/dawn/archive/9733be39e18186961d503e064874afe3e9ceb8d1.zip;2a4017c32892b90d072a9102eba90ae691fae36d
 kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.4.0.tar.gz;22d3b57b54a61c194ab256ff11b0353a3b220244
diff --git a/cmake/external/abseil-cpp.cmake b/cmake/external/abseil-cpp.cmake
@@ -27,7 +27,7 @@ else()
 endif()
 
 # NB! Advancing Abseil version changes its internal namespace,
-# currently absl::lts_20240116 which affects abseil-cpp.natvis debugger
+# currently absl::lts_20250512 which affects abseil-cpp.natvis debugger
 # visualization file, that must be adjusted accordingly, unless we eliminate
 # that namespace at build time.
 onnxruntime_fetchcontent_declare(
@@ -36,7 +36,7 @@ onnxruntime_fetchcontent_declare(
     URL_HASH SHA1=${DEP_SHA1_abseil_cpp}
     EXCLUDE_FROM_ALL
     PATCH_COMMAND ${ABSL_PATCH_COMMAND}
-    FIND_PACKAGE_ARGS 20240722 NAMES absl
+    FIND_PACKAGE_ARGS 20250512 NAMES absl
 )
 
 onnxruntime_fetchcontent_makeavailable(abseil_cpp)

diff --git a/cmake/external/abseil-cpp.natvis b/cmake/external/abseil-cpp.natvis
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-  <Type Name="absl::lts_20240722::InlinedVector&lt;*&gt;">
+  <Type Name="absl::lts_20250512::InlinedVector&lt;*&gt;">
     <Intrinsic Name="_size" Expression="storage_.metadata_.value >> 1"/>
     <Intrinsic Name="_is_allocated" Expression="(storage_.metadata_.value &amp; 1) == 1"/>
     <Intrinsic Name="_inlined_data" Expression="($T1*)storage_.data_.inlined.inlined_data"/>
@@ -24,7 +24,7 @@
     </Expand>
   </Type>
   <!-- Should handle both flat hash_set and hash_map -->
-  <Type Name="absl::lts_20240116::container_internal::raw_hash_set&lt;*&gt;">
+  <Type Name="absl::lts_20250512::container_internal::raw_hash_set&lt;*&gt;">
     <Intrinsic Name="_commonfields" Expression="settings_.value"/>
     <Intrinsic Name="_size" Expression="settings_.value.compressed_tuple_.value"/>
     <Intrinsic Name="_capacity" Expression="_commonfields().capacity_"/>
@@ -51,7 +51,7 @@
   </Type>
 
   <!-- Primitive types stored as a value -->
-  <Type Name="absl::lts_20240116::container_internal::Storage&lt;*,*,0&gt;">
+  <Type Name="absl::lts_20250512::container_internal::Storage&lt;*,*,0&gt;">
     <DisplayString IncludeView="noparens">*($T1 *){value}</DisplayString>
     <DisplayString ExcludeView="noparens">(*($T1 *){value})</DisplayString>
     <Expand>
@@ -60,15 +60,15 @@
   </Type>
 
   <!-- For storage inherited from the type -->
-  <Type Name="absl::lts_20240116::container_internal::Storage&lt;*,*,1&gt;">
+  <Type Name="absl::lts_20250512::container_internal::Storage&lt;*,*,1&gt;">
     <DisplayString IncludeView="noparens">*($T1 *)this</DisplayString>
     <DisplayString ExcludeView="noparens">(*($T1 *)this)</DisplayString>
     <Expand>
       <ExpandedItem>*($T1 *)this</ExpandedItem>
     </Expand>
   </Type>
 
-  <Type Name="absl::lts_20240116::container_internal::map_slot_type&lt;*&gt;">
+  <Type Name="absl::lts_20250512::container_internal::map_slot_type&lt;*&gt;">
     <DisplayString IncludeView="noparens">{value.first}, {value.second}</DisplayString>
     <DisplayString ExcludeView="noparens">({value.first}, {value.second})</DisplayString>
     <Expand>

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -723,36 +723,22 @@ if (onnxruntime_USE_WEBGPU)
       )
     else()
       set(ONNXRUNTIME_Dawn_PATCH_COMMAND
-          # The dawn.patch contains the following changes:
+          # The dawn_destroy_buffer_on_destructor.patch contains the following changes:
           #
           # - (private) Allow WGPUBufferImpl class to destroy the buffer in the destructor
           #   In native implementation, wgpuBufferRelease will trigger the buffer destroy (if refcount decreased to 0). But
           #   in emwgpu implementation, the buffer destroy won't happen. This change adds a destructor to the buffer class
           #   to destroy the buffer when the refcount is 0 for non-external buffers.
           #
-          # - (private) Remove hard-coded CMAKE_OSX_DEPLOYMENT_TARGET in Dawn's CMake files
-          #   https://github.com/microsoft/onnxruntime/pull/23729
-          #
-          # - (private) Reduce unsafe buffer usage warning in aligned_storage.h
-          #   https://github.com/microsoft/onnxruntime/pull/24308
-          #   The patch disables the UNSAFE_BUFFER_USAGE warning around the AlignedStorage struct in aligned_storage.h. This is done
-          #   by using TINT_BEGIN_DISABLE_WARNING and TINT_END_DISABLE_WARNING macros, which helps in warnings related to unsafe buffer usage
-          #   usage when compiling the code, making the build process cleaner and faster.
-          #
-          ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch &&
+          ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_destroy_buffer_on_destructor.patch &&
 
           # The dawn_force_enable_f16_nvidia_vulkan.patch contains the following changes:
           #
           # - (private) Force enable f16 support for NVIDIA Vulkan
           #   Dawn disabled f16 support for NVIDIA Vulkan by default because of crashes in f16 CTS tests (crbug.com/tint/2164).
           #   Since the crashes are limited to specific GPU models, we patched Dawn to remove the restriction.
-          ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_force_enable_f16_nvidia_vulkan.patch &&
-
-          # The dawn_fix_copy_dxil_dll.patch contains the following changes:
           #
-          # - (private) Fix copy of dxil.dll in Dawn
-          #   The patch ensures the copy of dxil.dll to be done after the build step of `dxcompiler` target.
-          ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_fix_copy_dxil_dll.patch)
+          ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_force_enable_f16_nvidia_vulkan.patch)
 
       onnxruntime_fetchcontent_declare(
         dawn

diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
@@ -22,6 +22,7 @@ endif()
 function(get_c_cxx_api_headers HEADERS_VAR)
   set(_headers
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_c_api.h"
+    "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_ep_c_api.h"
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_api.h"
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_inline.h"
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"

diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake
@@ -120,16 +120,14 @@ if (onnxruntime_USE_MIMALLOC)
   target_link_libraries(onnxruntime_common PRIVATE onnxruntime_mimalloc_shim)
 endif()
 
-if(NOT onnxruntime_DISABLE_ABSEIL)
-  target_include_directories(onnxruntime_common PRIVATE ${ABSEIL_SOURCE_DIR})
-  if (MSVC)
-    set(ABSEIL_NATVIS_FILE "abseil-cpp.natvis")
-    target_sources(
-        onnxruntime_common
-        INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/external/${ABSEIL_NATVIS_FILE}>)
-  endif()
+if (MSVC)
+  set(ABSEIL_NATVIS_FILE "abseil-cpp.natvis")
+  target_sources(
+      onnxruntime_common
+      INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/external/${ABSEIL_NATVIS_FILE}>)
 endif()
 
+
 if (MSVC)
     set(EIGEN_NATVIS_FILE ${eigen_SOURCE_DIR}/debug/msvc/eigen.natvis)
     if (EXISTS ${EIGEN_NATVIS_FILE})

diff --git a/cmake/onnxruntime_lora.cmake b/cmake/onnxruntime_lora.cmake
@@ -10,8 +10,7 @@ file(GLOB onnxruntime_lora_srcs CONFIGURE_DEPENDS
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_lora_srcs})
 
 onnxruntime_add_static_library(onnxruntime_lora ${onnxruntime_lora_srcs})
-onnxruntime_add_include_to_target(onnxruntime_lora onnx flatbuffers::flatbuffers Boost::mp11 ${GSL_TARGET})
-target_link_libraries(onnxruntime_lora onnxruntime_framework)
+onnxruntime_add_include_to_target(onnxruntime_lora onnxruntime_framework onnxruntime_common onnx flatbuffers::flatbuffers Boost::mp11 ${GSL_TARGET})
 
 if(onnxruntime_ENABLE_INSTRUMENT)
   target_compile_definitions(onnxruntime_lora PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT)

diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake
@@ -264,6 +264,11 @@
     if("90" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
       target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xptxas=-w>)
       target_compile_definitions(${target} PRIVATE COMPILE_HOPPER_TMA_GEMMS)
+      if (MSVC)
+        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /bigobj>")
+        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:--diag-suppress=177>")
+        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4172>")
+      endif()
     endif()
 
     if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling

diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
@@ -537,6 +537,7 @@ set(onnxruntime_mobile_util_srcs
     ${REPO_ROOT}/tools/python/util/pytorch_export_helpers.py
     ${REPO_ROOT}/tools/python/util/reduced_build_config_parser.py
     ${REPO_ROOT}/tools/python/util/update_onnx_opset.py
+    ${REPO_ROOT}/tools/python/remove_initializer_from_input.py
 )
 file(GLOB onnxruntime_ort_format_model_srcs CONFIGURE_DEPENDS
     ${REPO_ROOT}/tools/python/util/ort_format_model/*.py