Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ endif()
function(get_c_cxx_api_headers HEADERS_VAR)
set(_headers
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_c_api.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_ep_c_api.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_api.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_inline.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_ep_c_api.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_ep_device_ep_metadata_keys.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_lite_custom_op.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_lite_custom_op.h"
)

if (onnxruntime_ENABLE_TRAINING_APIS)
Expand Down
22 changes: 20 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_ep_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,12 @@ struct OrtEp {
uint32_t ort_version_supported;

/** \brief Get the execution provider name.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEp instance.
* \return The execution provider name.
*
* \note Returned string is owned by ORT and valid until UnregisterExecutionProviderLibrary is called.
*
* \since Version 1.22.
*/
const char*(ORT_API_CALL* GetName)(_In_ const OrtEp* this_ptr);
Expand Down Expand Up @@ -578,6 +578,8 @@ struct OrtEpFactory {
uint32_t ort_version_supported;

/** \brief Get the name of the execution provider that the factory creates.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return The name of the execution provider the factory creates.
Expand All @@ -587,6 +589,8 @@ struct OrtEpFactory {
const char*(ORT_API_CALL* GetName)(const OrtEpFactory* this_ptr);

/** \brief Get the name of vendor who owns the execution provider that the factory creates.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return vendor The vendor name of the execution provider the factory creates.
Expand Down Expand Up @@ -659,6 +663,20 @@ struct OrtEpFactory {
*/
void(ORT_API_CALL* ReleaseEp)(OrtEpFactory* this_ptr, struct OrtEp* ep);

/** \brief Get the version of the execution provider that the factory creates.
*
* The version string should adhere to the Semantic Versioning 2.0 specification
* (https://github.com/semver/semver/blob/v2.0.0/semver.md).
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return The execution provider version string.
*
* \since Version 1.23.
*/
const char*(ORT_API_CALL* GetVersion)(_In_ const OrtEpFactory* this_ptr);

/** \brief Create an OrtAllocator for the given OrtMemoryInfo.
*
* This is used to create an allocator that an execution provider requires. The factory that creates the EP is
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

// This file contains well-known keys for OrtEpDevice EP metadata entries.
// It does NOT specify all available metadata keys.

// Key for the execution provider version string. This should be available for all plugin EPs.
static const char* const kOrtEpDevice_EpMetadataKey_Version = "version";
60 changes: 60 additions & 0 deletions onnxruntime/core/common/semver.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/common/semver.h"

#include <regex>

#include "core/common/common.h"
#include "core/common/narrow.h"
#include "core/common/parse_string.h"

namespace onnxruntime {

Status ParseSemVerVersion(std::string_view version_string, SemVerVersion* semver_version_out) {
// Semantic Versioning version regex was copied from here:
// https://github.com/semver/semver/blob/d58db1686379c8c6d52e32d42d3a530a964264e5/semver.md?plain=1#L357
static const std::regex semver_pattern{
R"(^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$)"};

std::cmatch match_result{};
ORT_RETURN_IF_NOT(std::regex_match(version_string.data(), version_string.data() + version_string.size(),
match_result, semver_pattern),
"Version string is not in semantic versioning format: '", version_string, "'");

auto sub_match_to_string_view = [](const std::csub_match& sub_match) -> std::optional<std::string_view> {
if (!sub_match.matched) {
return std::nullopt;
}
return std::string_view{sub_match.first, narrow<size_t>(sub_match.length())};
};

auto parse_version_component =
[&sub_match_to_string_view](const std::csub_match& sub_match, uint32_t& component) -> Status {
const auto component_str = sub_match_to_string_view(sub_match);
ORT_RETURN_IF_NOT(component_str.has_value(), "sub_match does not match anything.");
return ParseStringWithClassicLocale(*component_str, component);
};

SemVerVersion semver_version{};

ORT_RETURN_IF_ERROR(parse_version_component(match_result[1], semver_version.major));
ORT_RETURN_IF_ERROR(parse_version_component(match_result[2], semver_version.minor));
ORT_RETURN_IF_ERROR(parse_version_component(match_result[3], semver_version.patch));

semver_version.prerelease = sub_match_to_string_view(match_result[4]);
semver_version.build_metadata = sub_match_to_string_view(match_result[5]);

if (semver_version_out) {
*semver_version_out = std::move(semver_version);

Check notice on line 49 in onnxruntime/core/common/semver.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/common/semver.cc#L49

Add #include <utility> for move [build/include_what_you_use] [4]
Raw output
onnxruntime/core/common/semver.cc:49:  Add #include <utility> for move  [build/include_what_you_use] [4]
}
return Status::OK();
}

SemVerVersion ParseSemVerVersion(std::string_view version_string) {
SemVerVersion result{};
ORT_THROW_IF_ERROR(ParseSemVerVersion(version_string, &result));
return result;
}

} // namespace onnxruntime
32 changes: 32 additions & 0 deletions onnxruntime/core/common/semver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <optional>
#include <string_view>

#include "core/common/status.h"

namespace onnxruntime {

// Semantic Versioning version utilities.
// See https://github.com/semver/semver/blob/v2.0.0/semver.md.

// Semantic Versioning version components.
struct SemVerVersion {
uint32_t major{};
uint32_t minor{};
uint32_t patch{};
std::optional<std::string_view> prerelease{};
std::optional<std::string_view> build_metadata{};
};

// Parse a Semantic Versioning version from `version_string`.
// If provided, the parsed version components will be written to `semver_version`.
Status ParseSemVerVersion(std::string_view version_string, SemVerVersion* semver_version);

// Parse a Semantic Versioning version from `version_string`.
SemVerVersion ParseSemVerVersion(std::string_view version_string);

} // namespace onnxruntime
23 changes: 21 additions & 2 deletions onnxruntime/core/graph/graph_proto_serializer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ void GraphViewerToProto(const GraphViewer& graph_view,
ONNX_NAMESPACE::GraphProto& graph_proto,
bool include_initializer,
bool include_outer_scope_args,
ExecutionOrder order) {
ExecutionOrder order,
bool include_initializer_data) {
graph_proto.set_name(graph_view.Name());
graph_proto.set_doc_string(graph_view.Description());

Expand Down Expand Up @@ -92,7 +93,25 @@ void GraphViewerToProto(const GraphViewer& graph_view,
const auto& [name, init] = *it;
current_scope_initializer_set.insert(name);
auto* p_initializer = graph_proto.add_initializer();
ORT_THROW_IF_ERROR(get_initializer_with_data(*init, *p_initializer));

// Do not save raw or external data into the graph, only the metadata
if (!include_initializer_data && (init->has_raw_data() || init->has_data_location())) {
// Set datatype
if (init->has_data_type()) {
p_initializer->set_data_type(init->data_type());
}
// Set name
if (init->has_name()) {
p_initializer->set_name(init->name());
}

// Set dims
for (int i = 0; i < init->dims_size(); ++i) {
p_initializer->add_dims(init->dims()[i]);
}
} else {
ORT_THROW_IF_ERROR(get_initializer_with_data(*init, *p_initializer));
}
}

// handle outer scope value which is a constant initializer
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/graph/graph_proto_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ void GraphViewerToProto(const GraphViewer& graph_view,
ONNX_NAMESPACE::GraphProto& graph_proto,
bool include_initializer,
bool include_outer_scope_args,
ExecutionOrder order = ExecutionOrder::DEFAULT);
ExecutionOrder order = ExecutionOrder::DEFAULT,
bool include_initializer_data = true);
} // namespace onnxruntime
6 changes: 6 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,14 @@
} // namespace onnxruntime

#include "core/framework/error_code_helper.h"
#include "onnxruntime_config.h" // for ORT_VERSION

Check notice on line 311 in onnxruntime/core/providers/cuda/cuda_provider_factory.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/cuda/cuda_provider_factory.cc#L311

Include the directory when naming header files [build/include_subdir] [4]
Raw output
onnxruntime/core/providers/cuda/cuda_provider_factory.cc:311:  Include the directory when naming header files  [build/include_subdir] [4]

// OrtEpApi infrastructure to be able to use the CUDA EP as an OrtEpFactory for auto EP selection.
struct CudaEpFactory : OrtEpFactory {
CudaEpFactory(const OrtApi& ort_api_in) : ort_api{ort_api_in} {
GetName = GetNameImpl;
GetVendor = GetVendorImpl;
GetVersion = GetVersionImpl;
GetSupportedDevices = GetSupportedDevicesImpl;
CreateEp = CreateEpImpl;
ReleaseEp = ReleaseEpImpl;
Expand All @@ -329,6 +331,10 @@
return factory->vendor.c_str();
}

static const char* ORT_API_CALL GetVersionImpl(const OrtEpFactory* /*this_ptr*/) noexcept {
return ORT_VERSION;
}

static OrtStatus* GetSupportedDevicesImpl(OrtEpFactory* this_ptr,
const OrtHardwareDevice* const* devices,
size_t num_devices,
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/providers/qnn/qnn_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
}

#include "core/framework/error_code_helper.h"
#include "onnxruntime_config.h" // for ORT_VERSION

Check notice on line 119 in onnxruntime/core/providers/qnn/qnn_provider_factory.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/qnn/qnn_provider_factory.cc#L119

Include the directory when naming header files [build/include_subdir] [4]
Raw output
onnxruntime/core/providers/qnn/qnn_provider_factory.cc:119:  Include the directory when naming header files  [build/include_subdir] [4]

// OrtEpApi infrastructure to be able to use the QNN EP as an OrtEpFactory for auto EP selection.
struct QnnEpFactory : OrtEpFactory {
Expand All @@ -126,6 +127,7 @@
: ort_api{ort_api_in}, ep_name{ep_name}, ort_hw_device_type{hw_type}, qnn_backend_type{qnn_backend_type} {
GetName = GetNameImpl;
GetVendor = GetVendorImpl;
GetVersion = GetVersionImpl;
GetSupportedDevices = GetSupportedDevicesImpl;
CreateEp = CreateEpImpl;
ReleaseEp = ReleaseEpImpl;
Expand All @@ -143,6 +145,10 @@
return factory->vendor.c_str();
}

static const char* ORT_API_CALL GetVersionImpl(const OrtEpFactory* /*this_ptr*/) noexcept {
return ORT_VERSION;
}

// Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
// An EP created with this factory is expected to be able to execute a model with *all* supported
// hardware devices at once. A single instance of QNN EP is not currently setup to partition a model among
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,8 @@ struct ProviderHost {
ONNX_NAMESPACE::GraphProto& graph_proto,
bool include_initializers,
bool include_outer_scope_args,
int execution_order) noexcept = 0;
int execution_order,
bool include_initializer_data) noexcept = 0;
virtual const Node* GraphViewer__GetProducerNode(const GraphViewer* p, const std::string& node_arg_name) const = 0;
virtual IOnnxRuntimeOpSchemaCollectionPtr GraphViewer__GetSchemaRegistry(const GraphViewer* p) const = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1150,8 +1150,9 @@ class GraphViewer final {
void ToProto(ONNX_NAMESPACE::GraphProto& graph_proto,
bool include_initializers,
bool include_outer_scope_args,
int execution_order = 0) const {
g_host->GraphViewer__ToProto(this, graph_proto, include_initializers, include_outer_scope_args, execution_order);
int execution_order = 0,
bool include_initializer_data = true) const {
g_host->GraphViewer__ToProto(this, graph_proto, include_initializers, include_outer_scope_args, execution_order, include_initializer_data);
}
const Node* GetProducerNode(const std::string& node_arg_name) const { return g_host->GraphViewer__GetProducerNode(this, node_arg_name); }
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const { return g_host->GraphViewer__GetSchemaRegistry(this); }
Expand Down
13 changes: 6 additions & 7 deletions onnxruntime/core/providers/webgpu/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "core/framework/session_state.h"
#include "core/providers/webgpu/allocator.h"
#include "core/providers/webgpu/webgpu_context.h"
#include "core/providers/webgpu/buffer_manager.h"

namespace onnxruntime {
namespace webgpu {
Expand All @@ -15,18 +15,17 @@ void* GpuBufferAllocator::Alloc(size_t size) {

stats_.num_allocs++;

#if !defined(__wasm__)
if (!session_initialized_ && context_.DeviceHasFeature(wgpu::FeatureName::BufferMapExtendedUsages)) {
return context_.BufferManager().CreateUMA(size);
// Check if the buffer manager supports UMA and we're not yet in an initialized session
if (!session_initialized_ && buffer_manager_.SupportsUMA()) {
return buffer_manager_.CreateUMA(size);
}
#endif // !defined(__wasm__)

return context_.BufferManager().Create(size);
return buffer_manager_.Create(size);
}

void GpuBufferAllocator::Free(void* p) {
if (p != nullptr) {
context_.BufferManager().Release(static_cast<WGPUBuffer>(p));
buffer_manager_.Release(static_cast<WGPUBuffer>(p));
stats_.num_allocs--;
}
}
Expand Down
9 changes: 4 additions & 5 deletions onnxruntime/core/providers/webgpu/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,26 @@
namespace onnxruntime {
namespace webgpu {

class WebGpuContext;
class BufferManager;

class GpuBufferAllocator : public IAllocator {
public:
GpuBufferAllocator(const WebGpuContext& context)
GpuBufferAllocator(const BufferManager& buffer_manager)

Check notice on line 16 in onnxruntime/core/providers/webgpu/allocator.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/webgpu/allocator.h#L16

Single-parameter constructors should be marked explicit. [runtime/explicit] [4]
Raw output
onnxruntime/core/providers/webgpu/allocator.h:16:  Single-parameter constructors should be marked explicit.  [runtime/explicit] [4]
: IAllocator(
OrtMemoryInfo(WEBGPU_BUFFER, OrtAllocatorType::OrtDeviceAllocator,
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NONE, 0),
OrtMemTypeDefault)),
context_{context} {
buffer_manager_{buffer_manager} {
}

virtual void* Alloc(size_t size) override;
virtual void Free(void* p) override;
void GetStats(AllocatorStats* stats) override;

void OnSessionInitializationEnd();

private:
AllocatorStats stats_;
const WebGpuContext& context_;
const BufferManager& buffer_manager_;
bool session_initialized_ = false;
};

Expand Down
Loading
Loading