Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1851b73
[build] fix packaging pipeline for arm64/linux (#26592)
fs-eire Nov 19, 2025
e6023b0
Create a CUDA based memory arena instead of Cuda Allocator wrapped in…
yuslepukhin Nov 19, 2025
81a04ca
[webgpu] Fix the wrong fallback in Attention (#26608)
qjia7 Nov 20, 2025
607d5e4
[WebGPU] Implement Split-K on Conv|MatMul (#26461)
Jiawei-Shao Nov 20, 2025
4dbb05f
[core] allow using initializer allocator for prepack (#26617)
fs-eire Nov 20, 2025
1d2a434
[AIX]Blocking the call of dladdr under _AIX (#26513)
ranjitshs Nov 21, 2025
ee0ffd5
[WebNN] Update unit tests list (#26566)
Honry Nov 21, 2025
ff0715d
Adding candidate metadata key for tracking EP's OS driver version (#2…
adrastogi Nov 21, 2025
bdf8dc2
[WebNN EP] Support local attention feature for GQA (#26565)
peishenyan Nov 21, 2025
96926a0
[webgpu] Fused CopyKVCache and SplitPackedQKVWithRotaryEmbedding as S…
xiaofeihan1 Nov 21, 2025
4665804
Udpate MS Wil dependency for FETCH_CONTENT to the latest (#26623)
yuslepukhin Nov 21, 2025
977efe4
[webgpu] Throw errors for graph catpure when not implemented (#26604)
qjia7 Nov 21, 2025
4870d45
Add int8 support to ConvInteger (#26585)
rivkastroh Nov 24, 2025
5834bfe
Add API to access config entries from KernelInfo (#26589)
zpye Nov 24, 2025
e8bcd0d
[QNN EP] Fuse Gelu pattern into a QNN Gelu Node (#26417)
quic-tirupath Nov 24, 2025
8e951ef
Update weight sharing tool to support plugin EPs (#26614)
adrianlizarraga Nov 25, 2025
e6e048e
KFI-203 Improve thread safety of packing in convolve_kleidiai.cpp (#2…
Colm-in-Arm Nov 25, 2025
6274e3c
Merge branch 'master' into sync_msft_25112025
Jaswanth51 Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ googletest;https://github.com/google/googletest/archive/refs/tags/v1.17.0.zip;f6
googlexnnpack;https://github.com/google/XNNPACK/archive/3cf85e705098622d59056dcb8f5f963ea7bb0a00.zip;6f6bbba627241f89463ca845febaf063982b34fe
json;https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.zip;5e88795165cc8590138d1f47ce94ee567b85b4d6
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.19.1.zip;c5215b5697dcdfd71799f001b8c4054a6bba6b09
Expand Down
10 changes: 8 additions & 2 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1528,8 +1528,14 @@ endif()
onnxruntime_add_shared_library(onnxruntime_runtime_path_test_shared_library
${onnxruntime_runtime_path_test_shared_library_src})

target_link_libraries(onnxruntime_runtime_path_test_shared_library PRIVATE
onnxruntime_common cpuinfo ${CMAKE_DL_LIBS})
if (CMAKE_SYSTEM_NAME MATCHES "AIX")
target_link_libraries(onnxruntime_runtime_path_test_shared_library PRIVATE
onnxruntime_common ${CMAKE_DL_LIBS})
set_target_properties(onnxruntime_runtime_path_test_shared_library PROPERTIES AIX_SHARED_LIBRARY_ARCHIVE OFF)
else()
target_link_libraries(onnxruntime_runtime_path_test_shared_library PRIVATE
onnxruntime_common cpuinfo ${CMAKE_DL_LIBS})
endif()
target_include_directories(onnxruntime_runtime_path_test_shared_library PRIVATE ${ONNXRUNTIME_ROOT})

if(UNIX)
Expand Down
2 changes: 1 addition & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Do not modify directly.*
|Conv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *out* Y:**T**|22+|**T** = tensor(float)|
|||[11, 21]|**T** = tensor(float)|
|||[1, 10]|**T** = tensor(float)|
|ConvInteger|*in* x:**T1**<br> *in* w:**T2**<br> *in* x_zero_point:**T1**<br> *in* w_zero_point:**T2**<br> *out* y:**T3**|10+|**T1** = tensor(uint8)<br/> **T2** = tensor(uint8)<br/> **T3** = tensor(int32)|
|ConvInteger|*in* x:**T1**<br> *in* w:**T2**<br> *in* x_zero_point:**T1**<br> *in* w_zero_point:**T2**<br> *out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(int8), tensor(uint8)<br/> **T3** = tensor(int32)|
|ConvTranspose|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *out* Y:**T**|22+|**T** = tensor(float)|
|||[11, 21]|**T** = tensor(float)|
|||[1, 10]|**T** = tensor(float)|
Expand Down
19 changes: 19 additions & 0 deletions include/onnxruntime/core/framework/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ struct OrtArenaCfg {
int max_dead_bytes_per_chunk; // use -1 to allow ORT to choose the default
int initial_growth_chunk_size_bytes; // use -1 to allow ORT to choose the default
int64_t max_power_of_two_extend_bytes; // use -1 to allow ORT to choose the default
// Use CudaMemPool based arena if available (starting with cuda 11.2)
int use_cuda_mempool = -1;
// Amount of reserved memory in bytes to hold onto before trying
// to release memory back to the OS.
uint64_t cuda_mempool_release_threshold = 0;
// Bytes to keep on shrink for CudaMemPool, 0 is to attempt to release all, allocated space not affected.
size_t cuda_mempool_bytes_to_keep_on_shrink = 0;

bool IsValid() {
return arena_extend_strategy >= -1 && arena_extend_strategy <= 1 &&
Expand All @@ -55,6 +62,9 @@ struct OrtArenaCfg {
static constexpr const char* InitialGrowthChunkSizeBytes = "arena.initial_growth_chunk_size_bytes";
static constexpr const char* MaxPowerOfTwoExtendBytes = "arena.max_power_of_two_extend_bytes";
static constexpr const char* MaxMem = "arena.max_mem";
static constexpr const char* UseCudaMemPool = "arena.use_cuda_mempool";
static constexpr const char* CudaMempoolReleaseThreshold = "arena.cuda_mempool_release_threshold";
static constexpr const char* CudaMempoolBytesToKeepOnShrink = "arena.cuda_mempool_bytes_to_keep_on_shrink";
};

static onnxruntime::common::Status FromKeyValuePairs(const OrtKeyValuePairs& kvps, OrtArenaCfg& cfg);
Expand Down Expand Up @@ -348,4 +358,13 @@ void AllocatorDefaultFree(void* p);
void* AllocatorDefaultAllocAligned(size_t size, size_t alignment);
void AllocatorDefaultFreeAligned(void* p, size_t alignment);

class IArena : public IAllocator {
public:
using IAllocator::IAllocator;
virtual Status Shrink() = 0;
// Only implemented when IsStreamAware() returns true
virtual void ReleaseStreamBuffers(Stream* /*stream*/) {}
static IArena* SafeArenaCast(IAllocator* allocator);
};

} // namespace onnxruntime
17 changes: 17 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -6591,6 +6591,23 @@ struct OrtApi {
* \since Version 1.24
*/
ORT_API_T(bool, TensorTypeAndShape_HasShape, _In_ const OrtTensorTypeAndShapeInfo* info);

/** \brief Get all config entries from ::OrtKernelInfo.
*
* Gets all configuration entries from the ::OrtKernelInfo object as key-value pairs.
* Config entries are set on the ::OrtSessionOptions and are accessible in custom operator kernels.
*
* Used in the CreateKernel callback of an OrtCustomOp to access all session configuration entries
* during kernel construction.
*
* \param[in] info An instance of ::OrtKernelInfo.
* \param[out] out A pointer to a newly created OrtKeyValuePairs instance containing all config entries.
* Note: the user should call OrtApi::ReleaseKeyValuePairs.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
* \since Version 1.24
*/
ORT_API2_STATUS(KernelInfo_GetConfigEntries, _In_ const OrtKernelInfo* info, _Outptr_ OrtKeyValuePairs** out);
};

/*
Expand Down
2 changes: 2 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -2768,6 +2768,8 @@ struct KernelInfoImpl : Base<T> {

std::string GetNodeName() const;
Logger GetLogger() const;

KeyValuePairs GetConfigEntries() const;
};

} // namespace detail
Expand Down
7 changes: 7 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -2822,6 +2822,13 @@ inline Logger KernelInfoImpl<T>::GetLogger() const {
return Logger{out};
}

template <typename T>
inline KeyValuePairs KernelInfoImpl<T>::GetConfigEntries() const {
OrtKeyValuePairs* out = nullptr;
Ort::ThrowOnError(GetApi().KernelInfo_GetConfigEntries(this->p_, &out));
return KeyValuePairs{out};
}

inline void attr_utils::GetAttr(const OrtKernelInfo* p, const char* name, float& out) {
Ort::ThrowOnError(GetApi().KernelInfoGetAttribute_float(p, name, &out));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
// Key for the execution provider version string. This should be available for all plugin EPs.
static const char* const kOrtEpDevice_EpMetadataKey_Version = "version";

// Key for the execution provider OS driver version.
static const char* const kOrtEpDevice_EpMetadataKey_OSDriverVersion = "os_driver_version";

// Prefix for execution provider compatibility information stored in model metadata.
// Used when generating EP context models to store compatibility strings for each EP.
// Full key format: "ep_compatibility_info.<EP_TYPE>"
Expand Down
Loading
Loading