diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index eed08ee673e49..3426a2781bbc6 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -43,30 +43,18 @@ static bool ShouldExportEpContext(const SessionContext& session_context, const S } BackendManager::BackendManager(SessionContext& session_context, - SharedContextManager& shared_context_manager, + SharedContext& shared_context, const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger, EPCtxHandler& ep_ctx_handle) : ep_ctx_handle_(ep_ctx_handle), session_context_(session_context), - shared_context_manager_(shared_context_manager) { + shared_context_(shared_context) { subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph); // If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph, session_context_.onnx_model_path_name.filename().replace_extension("xml").string()); - if (subgraph_context_.is_ep_ctx_graph && !subgraph_context_.is_ep_ctx_ovir_encapsulated) { - shared_context_ = ep_ctx_handle.GetSharedContextForEpContextSubgraph(subgraph, - session_context_.GetModelPath()); - } else if (session_context_.so_context_enable && session_context_.so_share_ep_contexts) { - shared_context_ = shared_context_manager_.GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath()); - } else { - // Creating a shared context to satisfy backend. It won't be used for weight sharing. - // Don't make it the active share context since we don't actually want to share it. - shared_context_ = shared_context_manager_.GetOrCreateSharedContext(session_context_.GetOutputBinPath()); - } - ORT_ENFORCE(shared_context_, "Could not create a shared context."); - subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) { // return empty if graph has no inputs or if types are not one of FP32/FP16 // else assume the type of the first input @@ -138,7 +126,7 @@ BackendManager::BackendManager(SessionContext& session_context, concrete_backend_ = BackendFactory::MakeBackend(model_proto, session_context_, subgraph_context_, - *shared_context_, + shared_context_, model_stream); } catch (std::string const& msg) { ORT_THROW(msg); @@ -161,13 +149,13 @@ BackendManager::BackendManager(SessionContext& session_context, concrete_backend_ = BackendFactory::MakeBackend(model_proto, session_context_, subgraph_context_, - *shared_context_, + shared_context_, model_stream); } if (ShouldExportEpContext(session_context_, subgraph_context_)) { if (concrete_backend_) { - shared_context_->AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel()); + shared_context_.AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel()); } else { ORT_THROW( "Exporting dynamically compiled models at runtime is not supported. " @@ -193,19 +181,11 @@ void BackendManager::TryExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVi if (session_context_.so_context_embed_mode) { // Internal blob if (include_embed_data) { std::stringstream ss; - shared_context_->Serialize(ss); + shared_context_.Serialize(ss); model_blob_str = std::move(ss).str(); } } else { // External blob - // Build name by combining EpCtx model name (if available) and subgraph name. Model - // name is not available in when creating a session from memory - auto name = session_context_.so_context_file_path.stem().string(); - if (name.empty() && !graph_body_viewer.ModelPath().empty()) { - name = graph_body_viewer.ModelPath().stem().string(); - } - ORT_ENFORCE(!name.empty()); - - model_blob_str = shared_context_->GetBinPath().filename().string(); + model_blob_str = shared_context_.GetBinPath().filename().string(); } auto status = ep_ctx_handle_.AddOVEPCtxNodeToGraph(graph_body_viewer, @@ -521,7 +501,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, if ((session_context_.device_type.find("NPU") != std::string::npos) && (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) { std::unique_ptr model; - Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, *shared_context_); + Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); print_model_proto_duration(); @@ -788,7 +768,7 @@ void BackendManager::Compute(OrtKernelContext* context) { dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes, session_context_, subgraph_context_, - *shared_context_, + shared_context_, model_stream); } catch (const OnnxRuntimeException& ex) { // Build option disables fallback to CPU on compilation failures with NPU. @@ -808,7 +788,7 @@ void BackendManager::Compute(OrtKernelContext* context) { dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes, session_context_, subgraph_context_, - *shared_context_, + shared_context_, model_stream); } catch (std::string const& msg) { ORT_THROW(msg); diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index 64dadb6c2151b..716fe3ef4cc90 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -20,7 +20,7 @@ namespace openvino_ep { class BackendManager { public: BackendManager(SessionContext& session_context, - SharedContextManager& shared_context_manager, + SharedContext& shared_context, const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger, @@ -59,8 +59,7 @@ class BackendManager { SubGraphContext subgraph_context_; EPCtxHandler& ep_ctx_handle_; SessionContext& session_context_; - SharedContextManager& shared_context_manager_; - std::shared_ptr shared_context_; + SharedContext& shared_context_; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index b14e05191dfaa..ebb716a64162c 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -97,11 +97,12 @@ struct SessionContext : ProviderInfo { return onnx_model_path_name.empty() ? so_context_file_path : onnx_model_path_name; } - const std::filesystem::path GetOutputBinPath() const { - std::filesystem::path bin_file_name = so_context_file_path; - if (bin_file_name.empty()) { - bin_file_name = onnx_model_path_name; - } + const std::filesystem::path& GetOutputModelPath() const { + return so_context_file_path.empty() ? onnx_model_path_name : so_context_file_path; + } + + std::filesystem::path GetOutputBinPath() const { + const auto& bin_file_name = GetOutputModelPath(); if (bin_file_name.empty()) { return {}; } diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 3260d18e9f43c..8f47155d34fa1 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -93,29 +93,6 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer, return Status::OK(); } -std::shared_ptr EPCtxHandler::GetSharedContextForEpContextSubgraph(const GraphViewer& subgraph_view, const std::filesystem::path& ep_context_path) const { - if (!CheckForOVEPCtxNodeInGraph(subgraph_view)) { - return nullptr; - } - - auto first_index = *subgraph_view.GetNodesInTopologicalOrder().begin(); - auto node = subgraph_view.GetNode(first_index); - ORT_ENFORCE(node != nullptr); - auto& attrs = node->GetAttributes(); - ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) == 1); - const auto& ep_cache_context = attrs.at(EP_CACHE_CONTEXT).s(); - - ORT_ENFORCE(attrs.count(EMBED_MODE) == 1); - bool embed_mode = static_cast(attrs.at(EMBED_MODE).i()); - - std::filesystem::path bin_path{}; - if (!embed_mode) { - bin_path = ep_context_path.parent_path() / ep_cache_context; - } - - return shared_context_manager_->GetOrCreateSharedContext(bin_path); -} - std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const { auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin(); auto node = graph_viewer.GetNode(first_index); @@ -218,10 +195,12 @@ bool EPCtxHandler::CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, return false; } -void EPCtxHandler::Initialize(const std::vector& fused_nodes, const std::filesystem::path& ep_context_dir) { +std::shared_ptr EPCtxHandler::Initialize(const std::vector& fused_nodes, const SessionContext& session_context) { bool has_embed_nodes = false; bool has_non_embed_nodes = false; bool has_main_context = false; + + std::shared_ptr shared_context{}; for (const auto& fused_node_graph : fused_nodes) { const GraphViewer& graph_viewer = fused_node_graph.filtered_graph; @@ -241,28 +220,29 @@ void EPCtxHandler::Initialize(const std::vector(attrs.at(EMBED_MODE).i()); } - has_embed_nodes |= embed_mode; - has_non_embed_nodes |= !embed_mode; bool main_context = true; if (attrs.count(MAIN_CONTEXT) == 1) { main_context = static_cast(attrs.at(MAIN_CONTEXT).i()); } + has_main_context |= main_context; + has_embed_nodes |= embed_mode; + has_non_embed_nodes |= !embed_mode; const std::string& ep_cache_context = attrs.at(EP_CACHE_CONTEXT).s(); if (embed_mode) { std::filesystem::path dummy_path{}; - auto shared_context = shared_context_manager_->GetOrCreateSharedContext(dummy_path); + shared_context = shared_context_manager_->GetOrCreateSharedContext(dummy_path); if (main_context) { ORT_ENFORCE(!ep_cache_context.empty(), "Embedded EP context is indicated but EP_CACHE_CONTEXT attribute is empty."); std::istringstream ss(ep_cache_context); shared_context->Deserialize(ss); } } else { - std::filesystem::path ep_context_path = ep_context_dir / ep_cache_context; + std::filesystem::path ep_context_path = session_context.GetOutputModelPath().parent_path() / ep_cache_context; if (ep_context_path.extension() != ".xml") { - auto shared_context = shared_context_manager_->GetOrCreateSharedContext(ep_context_path); + shared_context = shared_context_manager_->GetOrCreateSharedContext(ep_context_path); shared_context->Deserialize(); } } @@ -272,6 +252,13 @@ void EPCtxHandler::Initialize(const std::vectorGetOrCreateActiveSharedContext(session_context.GetOutputBinPath()); + } + + return shared_context; } } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index fc2a56c1d0671..fce88005a0605 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -9,14 +9,12 @@ #include "core/providers/shared_library/provider_api.h" #include "core/framework/execution_provider.h" -#include "ov_bin_manager.h" #include "ov_shared_context.h" +#include "contexts.h" namespace onnxruntime { namespace openvino_ep { -class SharedBinManager; - struct ModelBlobWrapper { ModelBlobWrapper(std::unique_ptr stream, const ov::Tensor& tensor) : stream_(std::move(stream)), tensor_(tensor) {} std::unique_ptr stream_; @@ -38,7 +36,6 @@ class EPCtxHandler { EPCtxHandler(std::string ov_sdk_version, const logging::Logger& logger, std::shared_ptr shared_context_manager); EPCtxHandler(const EPCtxHandler&) = delete; // No copy constructor bool CheckForOVEPCtxNodeInGraph(const GraphViewer& subgraph_view) const; - std::shared_ptr GetSharedContextForEpContextSubgraph(const GraphViewer& subgraph_view, const std::filesystem::path& ep_context_path) const; bool CheckForOVEPCtxNode(const Node& node) const; Status AddOVEPCtxNodeToGraph(const GraphViewer& subgraph_view, const std::string& graph_name, @@ -47,7 +44,7 @@ class EPCtxHandler { std::unique_ptr GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& subgraph_view) const; InlinedVector GetEPCtxNodes() const; bool CheckEPCacheContextAttribute(const GraphViewer& subgraph_view, const std::string& target_attr_extn) const; - void Initialize(const std::vector& fused_nodes, const std::filesystem::path& ep_context_path); + std::shared_ptr Initialize(const std::vector& fused_nodes, const SessionContext& session_context); private: const std::string openvino_sdk_version_; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 6dc7328d696da..a099f85b2a4b9 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -110,22 +110,17 @@ common::Status OpenVINOExecutionProvider::Compile( std::string("Invalid EP context configuration: ") + kOrtSessionOptionEpContextEmbedMode + " must be 0 if " + kOrtSessionOptionShareEpContexts + " is 1."); } - bool is_epctx_model = false; if (!fused_nodes.empty()) { // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); session_context_.onnx_opset_version = graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); - - // OVIR wrapped in epctx should be treated as source but this code does not - // This corner case is not in use and will be addressed in a future commit - is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); } - if (is_epctx_model) { - ep_ctx_handle_.Initialize(fused_nodes, session_context_.GetOutputBinPath().parent_path()); - } + shared_context_ = ep_ctx_handle_.Initialize(fused_nodes, session_context_); + ORT_ENFORCE(shared_context_, + "Failed to create or retrieve SharedContext"); struct OpenVINOEPFunctionState { AllocateFunc allocate_func = nullptr; @@ -145,7 +140,7 @@ common::Status OpenVINOExecutionProvider::Compile( // For original model, check if the user wants to export a model with pre-compiled blob auto& backend_manager = backend_managers_.emplace_back(session_context_, - *shared_context_manager_, + *shared_context_, fused_node, graph_body_viewer, logger, @@ -199,11 +194,9 @@ common::Status OpenVINOExecutionProvider::Compile( // bit clunky ideally we should try to fold this into ep context handler if (!session_context_.so_context_embed_mode) { - auto shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath()); - shared_context->Serialize(); + shared_context_->Serialize(); if (session_context_.so_stop_share_ep_contexts) { shared_context_manager_->ClearActiveSharedContext(); - shared_context->Clear(); } } } diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 326f6de30498f..a343ad34cae50 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -81,6 +81,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider { SessionContext session_context_; std::shared_ptr ov_core_; std::shared_ptr shared_context_manager_; + std::shared_ptr shared_context_; std::list backend_managers_; // EP session owns the backend objects EPCtxHandler ep_ctx_handle_; diff --git a/onnxruntime/core/providers/openvino/ov_bin_manager.cc b/onnxruntime/core/providers/openvino/ov_bin_manager.cc index bdab631bb478b..88a50377281bc 100644 --- a/onnxruntime/core/providers/openvino/ov_bin_manager.cc +++ b/onnxruntime/core/providers/openvino/ov_bin_manager.cc @@ -189,13 +189,6 @@ std::unique_ptr BinManager::GetNativeBlobAsStream(const std::strin return std::make_unique(GetNativeBlob(blob_name)); } -void BinManager::Clear() { - std::unique_lock lock(mutex_); - native_blobs_.clear(); - mapped_bin_ = {}; - external_bin_path_.reset(); -} - std::filesystem::path BinManager::GetBinPathForModel(const std::filesystem::path& model_path) { ORT_ENFORCE(!model_path.empty()); return model_path.parent_path() / (model_path.stem().string() + "_" + kOpenVINOExecutionProvider + ".bin"); @@ -215,22 +208,12 @@ void BinManager::Deserialize(std::shared_ptr shared_context) { Deserialize(stream, shared_context); } -bool BinManager::ShouldSerialize(const std::shared_ptr& shared_context) const { - if (shared_context) { - auto metadata = shared_context->GetMetadataCopy(); - if (!metadata.empty()) { - return true; - } - } - return !native_blobs_.empty(); -} - void BinManager::Serialize(std::ostream& stream, std::shared_ptr shared_context) { std::shared_lock ul(mutex_); - if (!ShouldSerialize(shared_context)) { - // nothing to serialize - return; + auto metadata = shared_context ? shared_context->GetMetadataCopy() : SharedContext::Metadata::Map{}; + if (metadata.empty() && native_blobs_.empty()) { + return; // Nothing to serialize } const auto stream_start = stream.tellp(); @@ -259,19 +242,16 @@ void BinManager::Serialize(std::ostream& stream, std::shared_ptr j[BSONFields::kProducer] = BSONFields::kProducerName; // Add weights metadata as a map (from SharedContext if available) - if (shared_context) { - auto metadata = shared_context->GetMetadataCopy(); - if (!metadata.empty()) { - nlohmann::json weights_map = nlohmann::json::object(); - for (const auto& [key, value] : metadata) { - nlohmann::json weight_entry; - weight_entry[BSONFields::kLocation] = value.serialized.location.string(); - weight_entry[BSONFields::kDataOffset] = value.serialized.data_offset; - weight_entry[BSONFields::kSize] = value.serialized.size; - weights_map[key] = weight_entry; - } - j[BSONFields::kWeightsMetadata] = weights_map; + if (!metadata.empty()) { + nlohmann::json weights_map = nlohmann::json::object(); + for (const auto& [key, value] : metadata) { + nlohmann::json weight_entry; + weight_entry[BSONFields::kLocation] = value.serialized.location.string(); + weight_entry[BSONFields::kDataOffset] = value.serialized.data_offset; + weight_entry[BSONFields::kSize] = value.serialized.size; + weights_map[key] = weight_entry; } + j[BSONFields::kWeightsMetadata] = weights_map; } // Add blob metadata with placeholder values as a map (will be updated after writing blobs) @@ -340,6 +320,14 @@ void BinManager::Serialize(std::ostream& stream, std::shared_ptr } void BinManager::Deserialize(std::istream& stream, std::shared_ptr shared_context) { + try { + DeserializeImpl(stream, shared_context); + } catch (const std::exception& e) { + ORT_THROW(e.what(), "\nCould not deserialize binary data. This could mean the bin is corrupted or incompatible. Try re-generating ep context cache."); + } +} + +void BinManager::DeserializeImpl(std::istream& stream, const std::shared_ptr& shared_context) { // Read and validate header header_t header{}; diff --git a/onnxruntime/core/providers/openvino/ov_bin_manager.h b/onnxruntime/core/providers/openvino/ov_bin_manager.h index d6d6ada2d252a..b50cfc460ec96 100644 --- a/onnxruntime/core/providers/openvino/ov_bin_manager.h +++ b/onnxruntime/core/providers/openvino/ov_bin_manager.h @@ -31,7 +31,6 @@ class BinManager { void AddNativeBlob(const std::string& name, const ov::CompiledModel& compiled_model); ov::Tensor GetNativeBlob(const std::string& blob_name); std::unique_ptr GetNativeBlobAsStream(const std::string& blob_name); - void Clear(); // Serialization/Deserialization void Serialize(std::ostream& stream, std::shared_ptr shared_context = nullptr); @@ -65,7 +64,7 @@ class BinManager { } serialized_info; }; - bool ShouldSerialize(const std::shared_ptr& shared_context) const; + void DeserializeImpl(std::istream& stream, const std::shared_ptr& shared_context); mutable std::shared_mutex mutex_; std::optional external_bin_path_; diff --git a/onnxruntime/core/providers/openvino/ov_shared_context.cc b/onnxruntime/core/providers/openvino/ov_shared_context.cc index 84cce6e7e16d4..f48284d0cc974 100644 --- a/onnxruntime/core/providers/openvino/ov_shared_context.cc +++ b/onnxruntime/core/providers/openvino/ov_shared_context.cc @@ -132,14 +132,5 @@ void SharedContext::Deserialize() { bin_manager_.Deserialize(shared_from_this()); } -void SharedContext::Clear() { - // Outside the mutex since bin_manager has it's own lock, and we want to keep lock ordering consistent - // It's ok for clear to not be fully atomic we're primarily interested in internal consistency. - bin_manager_.Clear(); - std::unique_lock lock(mutex_); - weight_files_.clear(); - metadata_.clear(); -} - } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/ov_shared_context.h b/onnxruntime/core/providers/openvino/ov_shared_context.h index c893b64442fa4..aee6d5570d8fa 100644 --- a/onnxruntime/core/providers/openvino/ov_shared_context.h +++ b/onnxruntime/core/providers/openvino/ov_shared_context.h @@ -75,8 +75,6 @@ class SharedContext : public std::enable_shared_from_this { void Serialize(); void Deserialize(); - void Clear(); - std::filesystem::path GetBinPath() const { return bin_manager_.GetExternalBinPath(); } @@ -132,6 +130,7 @@ class SharedContextManager : public WeakSingleton { it->second = std::make_shared(model_path); } active_context_ = it->second; + active_context_path_ = model_path; return it->second; } @@ -146,6 +145,10 @@ class SharedContextManager : public WeakSingleton { void ClearActiveSharedContext() { std::lock_guard lock(mutex_); + if (active_context_) { + contexts_.erase(active_context_path_); + active_context_path_.clear(); + } active_context_ = nullptr; } @@ -153,6 +156,7 @@ class SharedContextManager : public WeakSingleton { mutable std::mutex mutex_; std::unordered_map> contexts_; std::shared_ptr active_context_; + std::filesystem::path active_context_path_; }; } // namespace openvino_ep