Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 10 additions & 30 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,30 +43,18 @@ static bool ShouldExportEpContext(const SessionContext& session_context, const S
}

BackendManager::BackendManager(SessionContext& session_context,
SharedContextManager& shared_context_manager,
SharedContext& shared_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger,
EPCtxHandler& ep_ctx_handle) : ep_ctx_handle_(ep_ctx_handle),
session_context_(session_context),
shared_context_manager_(shared_context_manager) {
shared_context_(shared_context) {
subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph);
// If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute
subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph,
session_context_.onnx_model_path_name.filename().replace_extension("xml").string());

if (subgraph_context_.is_ep_ctx_graph && !subgraph_context_.is_ep_ctx_ovir_encapsulated) {
shared_context_ = ep_ctx_handle.GetSharedContextForEpContextSubgraph(subgraph,
session_context_.GetModelPath());
} else if (session_context_.so_context_enable && session_context_.so_share_ep_contexts) {
shared_context_ = shared_context_manager_.GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
} else {
// Creating a shared context to satisfy backend. It won't be used for weight sharing.
// Don't make it the active share context since we don't actually want to share it.
shared_context_ = shared_context_manager_.GetOrCreateSharedContext(session_context_.GetOutputBinPath());
}
ORT_ENFORCE(shared_context_, "Could not create a shared context.");

subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) {
// return empty if graph has no inputs or if types are not one of FP32/FP16
// else assume the type of the first input
Expand Down Expand Up @@ -138,7 +126,7 @@ BackendManager::BackendManager(SessionContext& session_context,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
*shared_context_,
shared_context_,
model_stream);
} catch (std::string const& msg) {
ORT_THROW(msg);
Expand All @@ -161,13 +149,13 @@ BackendManager::BackendManager(SessionContext& session_context,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
*shared_context_,
shared_context_,
model_stream);
}

if (ShouldExportEpContext(session_context_, subgraph_context_)) {
if (concrete_backend_) {
shared_context_->AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel());
shared_context_.AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel());
} else {
ORT_THROW(
"Exporting dynamically compiled models at runtime is not supported. "
Expand All @@ -193,19 +181,11 @@ void BackendManager::TryExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVi
if (session_context_.so_context_embed_mode) { // Internal blob
if (include_embed_data) {
std::stringstream ss;
shared_context_->Serialize(ss);
shared_context_.Serialize(ss);
model_blob_str = std::move(ss).str();
}
} else { // External blob
// Build name by combining EpCtx model name (if available) and subgraph name. Model
// name is not available in when creating a session from memory
auto name = session_context_.so_context_file_path.stem().string();
if (name.empty() && !graph_body_viewer.ModelPath().empty()) {
name = graph_body_viewer.ModelPath().stem().string();
}
ORT_ENFORCE(!name.empty());

model_blob_str = shared_context_->GetBinPath().filename().string();
model_blob_str = shared_context_.GetBinPath().filename().string();
}

auto status = ep_ctx_handle_.AddOVEPCtxNodeToGraph(graph_body_viewer,
Expand Down Expand Up @@ -521,7 +501,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
if ((session_context_.device_type.find("NPU") != std::string::npos) &&
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, *shared_context_);
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
Expand Down Expand Up @@ -788,7 +768,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
session_context_,
subgraph_context_,
*shared_context_,
shared_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
// Build option disables fallback to CPU on compilation failures with NPU.
Expand All @@ -808,7 +788,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
session_context_,
subgraph_context_,
*shared_context_,
shared_context_,
model_stream);
} catch (std::string const& msg) {
ORT_THROW(msg);
Expand Down
5 changes: 2 additions & 3 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace openvino_ep {
class BackendManager {
public:
BackendManager(SessionContext& session_context,
SharedContextManager& shared_context_manager,
SharedContext& shared_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger,
Expand Down Expand Up @@ -59,8 +59,7 @@ class BackendManager {
SubGraphContext subgraph_context_;
EPCtxHandler& ep_ctx_handle_;
SessionContext& session_context_;
SharedContextManager& shared_context_manager_;
std::shared_ptr<SharedContext> shared_context_;
SharedContext& shared_context_;
};

} // namespace openvino_ep
Expand Down
11 changes: 6 additions & 5 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,12 @@ struct SessionContext : ProviderInfo {
return onnx_model_path_name.empty() ? so_context_file_path : onnx_model_path_name;
}

const std::filesystem::path GetOutputBinPath() const {
std::filesystem::path bin_file_name = so_context_file_path;
if (bin_file_name.empty()) {
bin_file_name = onnx_model_path_name;
}
const std::filesystem::path& GetOutputModelPath() const {
return so_context_file_path.empty() ? onnx_model_path_name : so_context_file_path;
}

std::filesystem::path GetOutputBinPath() const {
const auto& bin_file_name = GetOutputModelPath();
if (bin_file_name.empty()) {
return {};
}
Expand Down
45 changes: 16 additions & 29 deletions onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,29 +93,6 @@
return Status::OK();
}

std::shared_ptr<SharedContext> EPCtxHandler::GetSharedContextForEpContextSubgraph(const GraphViewer& subgraph_view, const std::filesystem::path& ep_context_path) const {
if (!CheckForOVEPCtxNodeInGraph(subgraph_view)) {
return nullptr;
}

auto first_index = *subgraph_view.GetNodesInTopologicalOrder().begin();
auto node = subgraph_view.GetNode(first_index);
ORT_ENFORCE(node != nullptr);
auto& attrs = node->GetAttributes();
ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) == 1);
const auto& ep_cache_context = attrs.at(EP_CACHE_CONTEXT).s();

ORT_ENFORCE(attrs.count(EMBED_MODE) == 1);
bool embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());

std::filesystem::path bin_path{};
if (!embed_mode) {
bin_path = ep_context_path.parent_path() / ep_cache_context;
}

return shared_context_manager_->GetOrCreateSharedContext(bin_path);
}

std::unique_ptr<ModelBlobWrapper> EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
auto node = graph_viewer.GetNode(first_index);
Expand Down Expand Up @@ -218,10 +195,12 @@
return false;
}

void EPCtxHandler::Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const std::filesystem::path& ep_context_dir) {
std::shared_ptr<SharedContext> EPCtxHandler::Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const SessionContext& session_context) {
bool has_embed_nodes = false;
bool has_non_embed_nodes = false;
bool has_main_context = false;

std::shared_ptr<SharedContext> shared_context{};

Check warning on line 203 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for shared_ptr<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc:203: Add #include <memory> for shared_ptr<> [build/include_what_you_use] [4]
for (const auto& fused_node_graph : fused_nodes) {
const GraphViewer& graph_viewer = fused_node_graph.filtered_graph;

Expand All @@ -241,28 +220,29 @@
if (attrs.count(EMBED_MODE) == 1) {
embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());
}
has_embed_nodes |= embed_mode;
has_non_embed_nodes |= !embed_mode;

bool main_context = true;
if (attrs.count(MAIN_CONTEXT) == 1) {
main_context = static_cast<bool>(attrs.at(MAIN_CONTEXT).i());
}

has_main_context |= main_context;
has_embed_nodes |= embed_mode;
has_non_embed_nodes |= !embed_mode;

const std::string& ep_cache_context = attrs.at(EP_CACHE_CONTEXT).s();
if (embed_mode) {
std::filesystem::path dummy_path{};
auto shared_context = shared_context_manager_->GetOrCreateSharedContext(dummy_path);
shared_context = shared_context_manager_->GetOrCreateSharedContext(dummy_path);
if (main_context) {
ORT_ENFORCE(!ep_cache_context.empty(), "Embedded EP context is indicated but EP_CACHE_CONTEXT attribute is empty.");
std::istringstream ss(ep_cache_context);
shared_context->Deserialize(ss);
}
} else {
std::filesystem::path ep_context_path = ep_context_dir / ep_cache_context;
std::filesystem::path ep_context_path = session_context.GetOutputModelPath().parent_path() / ep_cache_context;
if (ep_context_path.extension() != ".xml") {
auto shared_context = shared_context_manager_->GetOrCreateSharedContext(ep_context_path);
shared_context = shared_context_manager_->GetOrCreateSharedContext(ep_context_path);
shared_context->Deserialize();
}
}
Expand All @@ -272,6 +252,13 @@
"Mixed embed and non-embed EP context nodes are not supported in a single model.");
ORT_ENFORCE(!(has_embed_nodes && !has_main_context),
"Expected at least one main context node when embedded EP context nodes are present.");

// No ep context nodes found - create a shared context that can hold native blobs or shared weights.
if (!shared_context) {
shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context.GetOutputBinPath());
}

return shared_context;
}

} // namespace openvino_ep
Expand Down
7 changes: 2 additions & 5 deletions onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@

#include "core/providers/shared_library/provider_api.h"
#include "core/framework/execution_provider.h"
#include "ov_bin_manager.h"
#include "ov_shared_context.h"
#include "contexts.h"

Check warning on line 13 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Include the directory when naming header files [build/include_subdir] [4] Raw Output: onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h:13: Include the directory when naming header files [build/include_subdir] [4]

namespace onnxruntime {
namespace openvino_ep {

class SharedBinManager;

struct ModelBlobWrapper {
ModelBlobWrapper(std::unique_ptr<std::istream> stream, const ov::Tensor& tensor) : stream_(std::move(stream)), tensor_(tensor) {}
std::unique_ptr<std::istream> stream_;
Expand All @@ -38,7 +36,6 @@
EPCtxHandler(std::string ov_sdk_version, const logging::Logger& logger, std::shared_ptr<SharedContextManager> shared_context_manager);
EPCtxHandler(const EPCtxHandler&) = delete; // No copy constructor
bool CheckForOVEPCtxNodeInGraph(const GraphViewer& subgraph_view) const;
std::shared_ptr<SharedContext> GetSharedContextForEpContextSubgraph(const GraphViewer& subgraph_view, const std::filesystem::path& ep_context_path) const;
bool CheckForOVEPCtxNode(const Node& node) const;
Status AddOVEPCtxNodeToGraph(const GraphViewer& subgraph_view,
const std::string& graph_name,
Expand All @@ -47,7 +44,7 @@
std::unique_ptr<ModelBlobWrapper> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& subgraph_view) const;
InlinedVector<const Node*> GetEPCtxNodes() const;
bool CheckEPCacheContextAttribute(const GraphViewer& subgraph_view, const std::string& target_attr_extn) const;
void Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const std::filesystem::path& ep_context_path);
std::shared_ptr<SharedContext> Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const SessionContext& session_context);

Check warning on line 47 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h:47: Add #include <vector> for vector<> [build/include_what_you_use] [4]

private:
const std::string openvino_sdk_version_;
Expand Down
17 changes: 5 additions & 12 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,17 @@ common::Status OpenVINOExecutionProvider::Compile(
std::string("Invalid EP context configuration: ") + kOrtSessionOptionEpContextEmbedMode + " must be 0 if " + kOrtSessionOptionShareEpContexts + " is 1.");
}

bool is_epctx_model = false;
if (!fused_nodes.empty()) {
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
session_context_.onnx_opset_version =
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);

// OVIR wrapped in epctx should be treated as source but this code does not
// This corner case is not in use and will be addressed in a future commit
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
}

if (is_epctx_model) {
ep_ctx_handle_.Initialize(fused_nodes, session_context_.GetOutputBinPath().parent_path());
}
shared_context_ = ep_ctx_handle_.Initialize(fused_nodes, session_context_);
ORT_ENFORCE(shared_context_,
"Failed to create or retrieve SharedContext");

struct OpenVINOEPFunctionState {
AllocateFunc allocate_func = nullptr;
Expand All @@ -145,7 +140,7 @@ common::Status OpenVINOExecutionProvider::Compile(
// For original model, check if the user wants to export a model with pre-compiled blob

auto& backend_manager = backend_managers_.emplace_back(session_context_,
*shared_context_manager_,
*shared_context_,
fused_node,
graph_body_viewer,
logger,
Expand Down Expand Up @@ -199,11 +194,9 @@ common::Status OpenVINOExecutionProvider::Compile(

// bit clunky ideally we should try to fold this into ep context handler
if (!session_context_.so_context_embed_mode) {
auto shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
shared_context->Serialize();
shared_context_->Serialize();
if (session_context_.so_stop_share_ep_contexts) {
shared_context_manager_->ClearActiveSharedContext();
shared_context->Clear();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
SessionContext session_context_;
std::shared_ptr<OVCore> ov_core_;
std::shared_ptr<SharedContextManager> shared_context_manager_;
std::shared_ptr<SharedContext> shared_context_;

std::list<BackendManager> backend_managers_; // EP session owns the backend objects
EPCtxHandler ep_ctx_handle_;
Expand Down
Loading
Loading