Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <fstream>
#include <regex>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
Expand Down Expand Up @@ -170,7 +171,10 @@ BackendManager::BackendManager(SessionContext& session_context,
exception_str.find("intel_npu") != std::string::npos) {
// Handle NPU device related errors
#ifndef NDEBUG
ORT_THROW(exception_str + "\nModel needs to be recompiled\n");
std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
"\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" :
"\nModel needs to be recompiled\n";
ORT_THROW(exception_str + suffix);
#else
std::string error_message = "UNKNOWN NPU ERROR";
std::string error_code = "code 0x0";
Expand All @@ -183,7 +187,10 @@ BackendManager::BackendManager(SessionContext& session_context,
if (std::regex_search(exception_str, matches, error_code_pattern)) {
error_code = matches[0];
}
throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n");
std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
"\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" :
"\nModel needs to be recompiled\n";
throw std::runtime_error(error_message + ", " + error_code + suffix);
#endif
} else {
ORT_THROW(exception_str);
Expand Down Expand Up @@ -631,8 +638,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
// proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32;
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
external_initializers_offset_and_length.size() > 1 &&
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
external_initializers_offset_and_length.size() > 1 &&
extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
#else
const bool include_initializer_data_in_proto = true;
Expand All @@ -642,7 +649,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
auto model = subgraph.CreateModel(logger);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto);

print_model_proto_duration();
Expand Down Expand Up @@ -881,7 +888,25 @@ void BackendManager::Compute(OrtKernelContext* context) {
ORT_THROW(msg);
}
} else {
ORT_THROW(ex.what());
std::string exception_str = ex.what();
if (session_context_.so_disable_cpu_ep_fallback){
std::string error_message = "UNKNOWN NPU ERROR";
std::string error_code = "code 0x0";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
std::smatch matches;
if (std::regex_search(exception_str, matches, error_message_pattern)) {
error_message = matches[0];
}
if (std::regex_search(exception_str, matches, error_code_pattern)) {
error_code = matches[0];
}
std::string suffix = "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" ;
throw std::runtime_error(error_message + ", " + error_code + suffix);
}
else{
ORT_THROW(exception_str);
}
}
#endif
}
Expand Down
Loading