From 7bb85c3dfb87f2f6264338fdb53168a44c958975 Mon Sep 17 00:00:00 2001 From: RajeevSekar Date: Mon, 13 Oct 2025 12:16:12 +0530 Subject: [PATCH 1/2] fixed NPU exception message when CPU fallback is disabled --- .../providers/openvino/backend_manager.cc | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index b1cc42cc66ce8..2b44e39f92ac1 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -167,7 +167,10 @@ BackendManager::BackendManager(SessionContext& session_context, exception_str.find("intel_npu") != std::string::npos) { // Handle NPU device related errors #ifndef NDEBUG - ORT_THROW(exception_str + "\nModel needs to be recompiled\n"); + std::string suffix = session_context_.so_disable_cpu_ep_fallback ? + "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : + "\nModel needs to be recompiled\n"; + ORT_THROW(exception_str + suffix); #else std::string error_message = "UNKNOWN NPU ERROR"; std::string error_code = "code 0x0"; @@ -180,7 +183,10 @@ BackendManager::BackendManager(SessionContext& session_context, if (std::regex_search(exception_str, matches, error_code_pattern)) { error_code = matches[0]; } - throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n"); + std::string suffix = session_context_.so_disable_cpu_ep_fallback ? + "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : + "\nModel needs to be recompiled\n"; + throw std::runtime_error(error_message + ", " + error_code + suffix); #endif } else { ORT_THROW(exception_str); @@ -628,8 +634,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, // proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions. #if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025)) constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32; - const bool include_initializer_data_in_proto = !(session_context_.has_external_weights && - external_initializers_offset_and_length.size() > 1 && + const bool include_initializer_data_in_proto = !(session_context_.has_external_weights && + external_initializers_offset_and_length.size() > 1 && extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE); #else const bool include_initializer_data_in_proto = true; @@ -639,7 +645,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); - subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, + subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); print_model_proto_duration(); @@ -878,7 +884,25 @@ void BackendManager::Compute(OrtKernelContext* context) { ORT_THROW(msg); } } else { - ORT_THROW(ex.what()); + std::string exception_str = ex.what(); + if (session_context_.so_disable_cpu_ep_fallback){ + std::string error_message = "UNKNOWN NPU ERROR"; + std::string error_code = "code 0x0"; + std::regex error_message_pattern(R"(\bZE_\w*\b)"); + std::regex error_code_pattern("code 0x[0-9a-fA-F]+"); + std::smatch matches; + if (std::regex_search(exception_str, matches, error_message_pattern)) { + error_message = matches[0]; + } + if (std::regex_search(exception_str, matches, error_code_pattern)) { + error_code = matches[0]; + } + std::string suffix = "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" ; + throw std::runtime_error(error_message + ", " + error_code + suffix); + } + else{ + ORT_THROW(exception_str); + } } #endif } From e68e84e112f98c67b4edb7ce61239ab61e3b1f07 Mon Sep 17 00:00:00 2001 From: RajeevSekar Date: Tue, 14 Oct 2025 14:38:39 +0530 Subject: [PATCH 2/2] fixed lint issues --- onnxruntime/core/providers/openvino/backend_manager.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 2b44e39f92ac1..6661af1dcd0b2 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include