Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 59 additions & 19 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
namespace onnxruntime {
namespace openvino_ep {

SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary), file_path_(filename) {
try {
file_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
weights_size_ = file_.seekg(0, std::ios::end).tellg();
} catch (std::ifstream::failure& e) {
weights_size_ = std::filesystem::file_size(filename);
} catch (const std::exception& e) {
ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what());
}
}
Expand All @@ -35,6 +35,32 @@
file_.read(reinterpret_cast<char*>(data), size);
}

void* SharedContext::SharedWeights::WeightsFile::TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context) {
std::string dev_name{};
if (remote_context) {
dev_name = remote_context->get_device_name();
}

auto [it, inserted] = imported_device_tensors_.emplace(dev_name, MappingContainer{});
if (inserted) {
if (dev_name == "NPU") {
#if OPENVINO_VERSION_AT_LEAST(2025, 3)
// try to import the memory mapped file to remote tensor
ORT_ENFORCE(remote_context, "Error: Remote context is required for NPU device.");
auto npu_context = remote_context->as<ov::intel_npu::level_zero::ZeroContext>();
auto&& l0_tensor = npu_context.create_tensor(ov::element::Type_t::u8, {weights_size_}, ov::intel_npu::FileDescriptor(file_path_));
it->second = MappingContainer{.ptr_ = l0_tensor.get(), .tensor_ = l0_tensor};
#endif
} else if (dev_name.empty()) {
// CPU/virtual device case, create a CPU tensor memory mapped from file
auto&& mmaped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmaped_tensor.data(), .tensor_ = mmaped_tensor};
Comment on lines +55 to +57
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name and comment.

Suggested change
// CPU/virtual device case, create a CPU tensor memory mapped from file
auto&& mmaped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmaped_tensor.data(), .tensor_ = mmaped_tensor};
// CPU/virtual device case, create a CPU tensor memory mmapped from file
auto&& mmapped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmapped_tensor.data(), .tensor_ = mmapped_tensor};

Copilot uses AI. Check for mistakes.
}
}

return it->second.ptr_;
}

std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
try {
stream << metadata.size();
Expand Down Expand Up @@ -405,29 +431,43 @@
void CreateOVTensors(const std::string& device_name,
SharedContext::SharedWeights::Metadata::Map& metadata_map,
SharedContext::SharedWeights::WeightsFile& weights) {
// Get remote context if available
std::optional<ov::RemoteContext> opt_remote_ctx;
try {
opt_remote_ctx = OVCore::Get()->core.get_default_context(device_name);
} catch (const std::exception&) {
// Remote context not available
}

for (auto& [key, value] : metadata_map) {
if (value.tensor) continue;

// Get element data type
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;

ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type

// Create OpenVINO Tensor
if (device_name == "NPU") {
// Use remote tensors
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);

// Copy data to remote tensor
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type);

// Try to get memory-mapped weights
ov::Tensor tensor;
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));

if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
Comment on lines +451 to +459
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name.

Suggested change
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
uint8_t* mmapped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmapped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmapped_weights + value.data_offset);

Copilot uses AI. Check for mistakes.
Comment on lines +451 to +459
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name.

Suggested change
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
uint8_t* mmapped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmapped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmapped_weights + value.data_offset);

Copilot uses AI. Check for mistakes.
Comment on lines +451 to +459
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name (note: mmapped_offset is already correct).

Suggested change
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
uint8_t* mmapped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmapped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmapped_weights + value.data_offset);

Copilot uses AI. Check for mistakes.
tensor = ov::Tensor(ov_elementType, value.dimensions, mmapped_offset);
} else {
// Use vanilla tensors
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);
weights.load_weights(value.data_offset, value.tensor->data(), value.size);
ORT_ENFORCE(opt_remote_ctx, "Expected either memory-mapped weights or a valid remote context, but neither is available for device: ", device_name);
// Can't mmap the file to device tensor, create a host tensor and copy the data
tensor = opt_remote_ctx->create_host_tensor(ov_elementType, value.dimensions);
ORT_ENFORCE(tensor.get_byte_size() == value.size, "Remote tensor size mismatch");
weights.load_weights(value.data_offset, tensor.data(), value.size);
}
ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");

ORT_ENFORCE(tensor.get_byte_size() == value.size, "Unexpected tensor size mismatch");
value.tensor = std::make_shared<ov::Tensor>(std::move(tensor));

Check warning on line 470 in onnxruntime/core/providers/openvino/backend_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for make_shared<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/backend_utils.cc:470: Add #include <memory> for make_shared<> [build/include_what_you_use] [4]
}
}

Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,18 @@ class SharedContext : public WeakSingleton<SharedContext> {
explicit WeightsFile(std::filesystem::path filename);

void load_weights(size_t file_offset, void* data, size_t size);
void* TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context);
size_t Size() const { return weights_size_; }

private:
std::ifstream file_;
std::filesystem::path file_path_;
size_t weights_size_;
struct MappingContainer {
void* ptr_{nullptr};
ov::Tensor tensor_;
};
std::map<std::string, MappingContainer> imported_device_tensors_;
};

void clear() {
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@

#include <string>

// Helper macro to test OpenVINO version at compile time.
// Usage: #if OPENVINO_VERSION_AT_LEAST(2025, 3)
// Falls back to 0 if OPENVINO_VERSION_MAJOR/MINOR are not defined.
#if defined(OPENVINO_VERSION_MAJOR) && defined(OPENVINO_VERSION_MINOR)
#define OPENVINO_VERSION_AT_LEAST(major, minor) \
((OPENVINO_VERSION_MAJOR > (major)) || \
(OPENVINO_VERSION_MAJOR == (major) && OPENVINO_VERSION_MINOR >= (minor)))
#else
#define OPENVINO_VERSION_AT_LEAST(major, minor) 0
#endif

namespace onnxruntime {
namespace openvino_ep {
class OVCore;
Expand Down
Loading