From 85a77a70a44e24363f9eeb34d890ddde3c5651b3 Mon Sep 17 00:00:00 2001 From: Preetha Veeramalai Date: Mon, 2 Jun 2025 08:08:15 -0700 Subject: [PATCH] Remove stale IO buffer code --- cmake/onnxruntime_providers_openvino.cmake | 1 - .../openvino/backends/basic_backend.cc | 150 +----------------- .../openvino/backends/basic_backend.h | 7 - .../openvino/openvino_provider_factory.cc | 8 - .../core/providers/openvino/ov_interface.cc | 32 ---- .../core/providers/openvino/ov_interface.h | 17 -- 6 files changed, 1 insertion(+), 214 deletions(-) diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake index 03f67983c70ab..9e272bd18a497 100644 --- a/cmake/onnxruntime_providers_openvino.cmake +++ b/cmake/onnxruntime_providers_openvino.cmake @@ -31,7 +31,6 @@ list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES}) if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS}) AND onnxruntime_USE_OPENVINO_GPU) - add_definitions(-DIO_BUFFER_ENABLED=1) list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS}) endif() diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index e77ff973f3a87..e050967e4bfb7 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -60,27 +60,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr } try { - // IO_BUFFER is enabled on GPU HW. - // Pre-requisite is provider_option "context" must be set -#if defined(IO_BUFFER_ENABLED) - cl_context ctx = static_cast(session_context_.context); - remote_context_ = new ov::intel_gpu::ocl::ClContext(OVCore::Get()->core, ctx); - if (subgraph_context_.is_ep_ctx_graph) { - exe_network_ = OVCore::Get()->ImportModel(*model_stream, - remote_context_, - subgraph_context_.subgraph_name); - model_stream.reset(); // Delete stream after it is no longer needed - } else { - std::string model = model_proto->SerializeAsString(); - if (!subgraph_context.has_dynamic_input_shape) { - model_proto.reset() - } - auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); - LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled"; - exe_network_ = OVCore::Get()->CompileModel( - ov_model, remote_context_, subgraph_context_.subgraph_name); - } -#else // !IO_BUFFER_ENABLED + auto auto_unified_compile = ((hw_target.find("AUTO") == std::string::npos) || (session_context_.OpenVINO_Version.at(0) >= 2024 && session_context_.OpenVINO_Version.at(1) > 2)); @@ -117,7 +97,6 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr exe_network_ = OVCore::Get()->CompileModel( ov_model, hw_target, device_config, subgraph_context_.subgraph_name); } -#endif LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } catch (const char* msg) { ORT_THROW(msg); @@ -459,114 +438,6 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque } } -#ifdef IO_BUFFER_ENABLED -// Wait for Remote Aynchronous inference completion -void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) { - try { - auto graph_input_info = exe_network_.Get().inputs(); - int input_idx = 0; - for (auto input_info_iter = graph_input_info.begin(); - input_info_iter != graph_input_info.end(); ++input_info_iter) { - auto input_names = input_info_iter->get_names(); - std::string onnx_input_name; - std::string input_name; - // use names retrieved from original ONNX model to assign the right onnx input name for the graph - for (auto it = subgraph_context_.input_names.begin(); it != subgraph_context_.input_names.end(); ++it) { - if (it->second == input_idx) { - onnx_input_name = it->first; - break; - } - } - // using the input name retrieved from ONNX original to match with the input names returned by OV tensors - if (input_names.find(onnx_input_name) != input_names.end()) { - input_name = onnx_input_name; - } else { - ORT_THROW(log_tag + - "Input names mismatch between OpenVINO and ONNX. " + - onnx_input_name + - " doesn't exist in the list of OpenVINO input tensor names"); - } - input_idx++; - // Kernel Context Input Buffer - const auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name)); - // If the ORTValue wraps a device pointer - auto mem_info = tensor.GetTensorMemoryInfo(); - if (mem_info.GetAllocatorName() == OpenVINO_GPU) { - // Get the shared buffer pointer - const void* tensor_data = tensor.GetTensorRawData(); - const cl::Buffer* shared_buffer_const = static_cast(tensor_data); - // Create an Input Remote Blob - auto input = graph_input_info.at(0); - auto remote_blob = remote_context_->create_tensor( - input.get_element_type(), input.get_shape(), *shared_buffer_const); - ov::Tensor tensor_remote = static_cast(remote_blob); - OVTensorPtr tensor_ptr = std::make_shared(tensor_remote); - infer_request->SetTensor(input_name, tensor_ptr); - } else { - OVTensorPtr graph_input_blob; - graph_input_blob = infer_request->GetTensor(input_name); - size_t batch_slice_idx = 0; - FillInputBlob(graph_input_blob, batch_slice_idx, input_name, context, subgraph_context_); - } - } - - // Set the output blob as remote blob - auto graph_output_info = exe_network_.Get().outputs(); - for (auto output_info_iter = graph_output_info.begin(); - output_info_iter != graph_output_info.end(); ++output_info_iter) { - auto output_names = output_info_iter->get_names(); - std::string onnx_output_name; - std::string output_name; - bool output_name_found = false; - // using the output name retrieved from ONNX original to match with the output names returned by OV tensors - for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) { - onnx_output_name = it->first; - if (output_names.find(onnx_output_name) != output_names.end()) { - // Assigning the output_name - output_name = it->first; - output_name_found = true; - break; - } - } - if (!output_name_found) { - ORT_THROW( - log_tag + - "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + - onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names"); - } - - size_t batch_size = 1; - Ort::UnownedValue tensor = GetOutputTensor(context, - batch_size, - infer_request, - output_name, - subgraph_context_.output_names); - auto mem_info = tensor.GetTensorMemoryInfo(); - // Check if ORT Value wraps a device pointer - if (mem_info.GetAllocatorName() == OpenVINO_GPU) { - const void* tensor_data = tensor.GetTensorRawData(); - const cl::Buffer* shared_buffer_const = static_cast(tensor_data); - // Create a shared Blob, set the Infer Request Output Blob - auto output = graph_output_info.at(0); - auto remote_tensor = - remote_context_->create_tensor(output.get_element_type(), output.get_shape(), *shared_buffer_const); - ov::Tensor tensor_t = static_cast(remote_tensor); - OVTensorPtr tensor_ptr = std::make_shared(tensor_t); - try { - infer_request->SetTensor(output_name, tensor_ptr); - } catch (const char* msg) { - ORT_THROW(msg); - } - } - } - - // Start Async inference - infer_request->StartAsync(); - } catch (const char* msg) { - ORT_THROW(msg); - } -} -#endif // Wait for asynchronous inference completion on an Infer Request object indexed by infer_req_idx // and copy the results into a slice location within the batched output buffer indexed by batch_slice_idx @@ -653,28 +524,11 @@ void BasicBackend::Infer(OrtKernelContext* ctx) { // Requesting for an idle infer_request from a pool of infer_requests_ OVInferRequestPtr infer_request; infer_request = inferRequestsQueue_->getIdleRequest(); -#ifdef IO_BUFFER_ENABLED - if ((session_context_.device_type.find("GPU") != std::string::npos) && - (session_context_.context != nullptr) && session_context_.is_wholly_supported_graph) { - try { - StartRemoteAsyncInference(context, infer_request); - } catch (std::string const& msg) { - ORT_THROW(msg); - } - } else { - try { - StartAsyncInference(context, infer_request); - } catch (std::string const& msg) { - ORT_THROW(msg); - } - } -#else try { StartAsyncInference(context, infer_request); } catch (const std::runtime_error& e) { ORT_THROW(log_tag + " Exception at StartAsyncInference: " + e.what()); } -#endif try { CompleteAsyncInference(context, infer_request); } catch (const std::runtime_error& e) { @@ -696,13 +550,11 @@ void BasicBackend::Infer(OrtKernelContext* ctx) { // Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_ inferRequestsQueue_->putIdleRequest(std::move(infer_request)); #ifndef NDEBUG -#ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED if (openvino_ep::backend_utils::IsDebugEnabled()) { inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode std::string& hw_target = session_context_.device_type; printPerformanceCounts(std::move(infer_request_), std::cout, hw_target); } -#endif #endif } } diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 130699abd465b..a19b49e48e513 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -95,10 +95,6 @@ class BasicBackend : public IBackend { void SetNumThreads(ov::AnyMap& device_config); void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr infer_request); -#ifdef IO_BUFFER_ENABLED - void StartRemoteAsyncInference(Ort::KernelContext& context, std::shared_ptr infer_request); -#endif - void CompleteAsyncInference(Ort::KernelContext& context, std::shared_ptr infer_request); SessionContext& session_context_; @@ -108,9 +104,6 @@ class BasicBackend : public IBackend { OVExeNetwork exe_network_; std::map> const_outputs_map_; std::unique_ptr inferRequestsQueue_; -#if defined IO_BUFFER_ENABLED - OVRemoteContextPtr remote_context_; -#endif using ort_tensor_key_t = const std::string; std::map ort_ov_tensor_map; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index e5526ecd52bb9..0545f6f1a55ec 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -296,14 +296,6 @@ static void ParseProviderInfo(const ProviderOptions& provider_options, } pi.context = ParseUint64(provider_options, "context"); -#if defined(IO_BUFFER_ENABLED) - // a valid context must be provided to enable IO Buffer optimizations - if (pi.context == nullptr) { -#undef IO_BUFFER_ENABLED -#define IO_BUFFER_ENABLED = 0 - LOGS_DEFAULT(WARNING) << "Context is not set. Disabling IO Buffer optimization"; - } -#endif if (provider_options.contains("num_of_threads")) { if (!std::all_of(provider_options.at("num_of_threads").begin(), diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index a175ca863d1d1..9b9ce366f7ec9 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -143,38 +143,6 @@ void OVCore::SetCache(const std::string& cache_dir_path) { core.set_property(ov::cache_dir(cache_dir_path)); } -#ifdef IO_BUFFER_ENABLED -OVExeNetwork OVCore::CompileModel(std::shared_ptr& model, - OVRemoteContextPtr context, std::string name) { - try { - auto obj = core.compile_model(model, *context); -#ifndef NDEBUG - printDebugInfo(obj); -#endif - return OVExeNetwork(obj); - } catch (const Exception& e) { - ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); - } catch (...) { - ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); - } -} -OVExeNetwork OVCore::ImportModel(std::shared_ptr model_stream, - OVRemoteContextPtr context, std::string name) { - try { - auto obj = core.import_model(*model_stream, *context); -#ifndef NDEBUG - printDebugInfo(obj); -#endif - OVExeNetwork exe(obj); - return exe; - } catch (const Exception& e) { - ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); - } catch (...) { - ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); - } -} -#endif - std::vector OVCore::GetAvailableDevices() const { std::vector available_devices = core.get_available_devices(); return available_devices; diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index bebe73bd702dd..d47ffd5c3c561 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -15,10 +15,6 @@ #include "openvino/pass/convert_fp32_to_fp16.hpp" #include "openvino/frontend/manager.hpp" -#ifdef IO_BUFFER_ENABLED -#include -#endif - #include namespace onnxruntime { @@ -33,11 +29,6 @@ typedef ov::Model OVNetwork; typedef std::shared_ptr OVInferRequestPtr; typedef std::shared_ptr OVTensorPtr; -#ifdef IO_BUFFER_ENABLED -typedef ov::intel_gpu::ocl::ClContext* OVRemoteContextPtr; -typedef ov::RemoteContext OVRemoteContext; -#endif - std::optional queryOVProperty(const std::string& property, const std::string& device_type); template @@ -87,14 +78,6 @@ struct OVCore : WeakSingleton { std::string hw_target, const ov::AnyMap& device_config, std::string name); -#ifdef IO_BUFFER_ENABLED - OVExeNetwork CompileModel(std::shared_ptr& model, - OVRemoteContextPtr context, - std::string name); - OVExeNetwork ImportModel(std::shared_ptr model_stream, - OVRemoteContextPtr context, - std::string name); -#endif std::vector GetAvailableDevices() const; std::vector GetAvailableDevices(const std::string& device_type) const; void SetCache(const std::string& cache_dir_path);