Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 45 additions & 64 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,35 @@ ov::CompiledModel BackendManager::GetOVCompiledModel() {
return ov::CompiledModel();
}

static bool ShouldExportEpContext(const SessionContext& session_context, const SubGraphContext& subgraph_context) {
return session_context.so_context_enable && (subgraph_context.is_ep_ctx_ovir_encapsulated || !subgraph_context.is_ep_ctx_graph);
}

BackendManager::BackendManager(SessionContext& session_context,
SharedContext& shared_context,
SharedContextManager& shared_context_manager,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger,
EPCtxHandler& ep_ctx_handle) : ep_ctx_handle_(ep_ctx_handle),
session_context_(session_context),
shared_context_{shared_context} {
shared_context_manager_(shared_context_manager) {
subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph);
// If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute
subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph,
session_context_.onnx_model_path_name.filename().replace_extension("xml").string());

if (subgraph_context_.is_ep_ctx_graph && !subgraph_context_.is_ep_ctx_ovir_encapsulated) {
shared_context_ = ep_ctx_handle.GetSharedContextForEpContextSubgraph(subgraph,
session_context_.GetModelPath());
} else if (session_context_.so_context_enable && session_context_.so_share_ep_contexts) {
shared_context_ = shared_context_manager_.GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
} else {
// Creating a shared context to satisfy backend. It won't be used for weight sharing.
// Don't make it the active share context since we don't actually want to share it.
shared_context_ = shared_context_manager_.GetOrCreateSharedContext(session_context_.GetOutputBinPath());
}
ORT_ENFORCE(shared_context_, "Could not create a shared context.");

subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) {
// return empty if graph has no inputs or if types are not one of FP32/FP16
// else assume the type of the first input
Expand Down Expand Up @@ -107,23 +123,6 @@ BackendManager::BackendManager(SessionContext& session_context,
}
std::string device_type = session_context_.device_type;

auto& sw = shared_context_.shared_weights;
if (session_context_.so_share_ep_contexts && !sw.metadata.empty()) {
std::filesystem::path weight_filename = session_context_.onnx_model_path_name.parent_path();
if (sw.external_weight_filename.empty()) {
// Reasonable assumption that all metadata entries have the same external file location
sw.external_weight_filename = sw.metadata.begin()->second.location;
}
weight_filename /= sw.external_weight_filename;
std::ifstream weight_file(weight_filename);

ORT_ENFORCE(weight_file, "Initializer file not found: ", weight_filename.string());
if (!sw.mapped_weights) {
sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
}
backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
}

if (subgraph_context_.has_dynamic_input_shape) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if ((!session_context_.disable_dynamic_shapes &&
Expand All @@ -138,7 +137,7 @@ BackendManager::BackendManager(SessionContext& session_context,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
shared_context_,
*shared_context_,
model_stream);
} catch (std::string const& msg) {
ORT_THROW(msg);
Expand All @@ -162,7 +161,7 @@ BackendManager::BackendManager(SessionContext& session_context,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
shared_context_,
*shared_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
std::string exception_str = ex.what();
Expand Down Expand Up @@ -193,15 +192,15 @@ BackendManager::BackendManager(SessionContext& session_context,
}
}
}
if (session_context_.so_context_enable &&
(subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {

if (ShouldExportEpContext(session_context_, subgraph_context_)) {
if (concrete_backend_) {
auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph);
if (!status.IsOK()) {
ORT_THROW(status);
}
shared_context_->AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel());
} else {
ORT_THROW("[OpenVINO-EP] Cannot export compiled blob as EPCtx Node: Backend not initialized.");
ORT_THROW(
"Exporting dynamically compiled models at runtime is not supported. "
"Cannot export blobs of dynamic models that request static shape inference. "
"To export this model, set disable_dynamic_shapes to False");
}
}
}
Expand All @@ -210,25 +209,20 @@ BackendManager::BackendManager(SessionContext& session_context,
// precompiled blob is set. If that's the case:
// By default, create model in embed mode where the blob stream is exported as data within
// the EPContext node.
Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer) {
if (session_context_.disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
std::string exception_str =
"Exporting dynamically compiled models at runtime is not supported. "
"Cannot export blobs of dynamic models that request static shape inference. "
"To export this model, set disable_dynamic_shapes to False";
ORT_THROW(exception_str);
void BackendManager::TryExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer, bool include_embed_data) {
if (!ShouldExportEpContext(session_context_, subgraph_context_) || !concrete_backend_) {
return;
}

// If embed_mode, then pass on the serialized blob
// If not embed_mode, dump the blob here and only pass on the path to the blob
std::string model_blob_str;
auto compiled_model = concrete_backend_->GetOVCompiledModel();
if (session_context_.so_context_embed_mode) { // Internal blob
std::ostringstream model_blob_stream;
compiled_model.export_model(model_blob_stream);
model_blob_str = std::move(model_blob_stream).str();
if (model_blob_str.empty()) {
ORT_THROW("Model blob stream is empty after exporting the compiled model.");
if (include_embed_data) {
std::stringstream ss;
shared_context_->Serialize(ss);
model_blob_str = std::move(ss).str();
}
} else { // External blob
// Build name by combining EpCtx model name (if available) and subgraph name. Model
Expand All @@ -238,30 +232,17 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
name = graph_body_viewer.ModelPath().stem().string();
}
ORT_ENFORCE(!name.empty());
name += "_" + subgraph_context_.subgraph_name;

std::filesystem::path blob_filename = session_context_.so_context_file_path;
if (blob_filename.empty()) {
blob_filename = session_context_.onnx_model_path_name;
}
blob_filename = blob_filename.parent_path() / (name + ".blob");
std::ofstream blob_file(blob_filename,
std::ios::out | std::ios::trunc | std::ios::binary);
if (!blob_file) {
std::ostringstream err_msg;
err_msg << "Unable to open file for epctx model dump: " << blob_filename;
ORT_THROW(err_msg.str());
}
compiled_model.export_model(blob_file);
model_blob_str = blob_filename.filename().string();
model_blob_str = shared_context_->GetBinPath().filename().string();
}

ORT_RETURN_IF_ERROR(ep_ctx_handle_.AddOVEPCtxNodeToGraph(graph_body_viewer,
subgraph_context_.subgraph_name,
session_context_.so_context_embed_mode,
std::move(model_blob_str)));

return Status::OK();
auto status = ep_ctx_handle_.AddOVEPCtxNodeToGraph(graph_body_viewer,
subgraph_context_.subgraph_name,
session_context_.so_context_embed_mode,
std::move(model_blob_str));
if (!status.IsOK()) {
ORT_THROW("[OpenVINO-EP] Failed to add OVEP EPContext node to the graph: " + status.ErrorMessage());
}
}

bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
Expand Down Expand Up @@ -568,7 +549,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
if ((session_context_.device_type.find("NPU") != std::string::npos) &&
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, *shared_context_);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
Expand Down Expand Up @@ -835,7 +816,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
session_context_,
subgraph_context_,
shared_context_,
*shared_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
// Build option disables fallback to CPU on compilation failures with NPU.
Expand All @@ -855,7 +836,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
session_context_,
subgraph_context_,
shared_context_,
*shared_context_,
model_stream);
} catch (std::string const& msg) {
ORT_THROW(msg);
Expand Down
7 changes: 4 additions & 3 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ namespace openvino_ep {
class BackendManager {
public:
BackendManager(SessionContext& session_context,
SharedContext& shared_context,
SharedContextManager& shared_context_manager,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger,
EPCtxHandler& ctx_handle);
void Compute(OrtKernelContext* context);
void ShutdownBackendManager();
SessionContext& GetSessionContext();
Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph);
void TryExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph, bool include_embed_data);
ov::CompiledModel GetOVCompiledModel();
void RewindKVCache(size_t index);

Expand Down Expand Up @@ -59,7 +59,8 @@ class BackendManager {
SubGraphContext subgraph_context_;
EPCtxHandler& ep_ctx_handle_;
SessionContext& session_context_;
SharedContext& shared_context_;
SharedContextManager& shared_context_manager_;
std::shared_ptr<SharedContext> shared_context_;
};

} // namespace openvino_ep
Expand Down
Loading
Loading