From 74a7ffc2c3b44f1df0527eff8bef3ec47db2c502 Mon Sep 17 00:00:00 2001
From: TejalKhade28 <tejal.khade@intel.com>
Date: Fri, 23 May 2025 20:19:01 +0530
Subject: [PATCH 1/3] Catch exception with TDR

---
 .../openvino/backends/basic_backend.cc        | 55 ++++++++++---------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index e77ff973f3a87..9085bfea5f0c4 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -573,36 +573,39 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
 void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
   // Wait for Async inference completion
   try {
-    bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
-                      session_context_.device_type.find("GPU") != std::string::npos;
-
     infer_request->WaitRequest();
+  } catch (const char* msg) {
+    ORT_THROW(msg);
+  }
 
-    if (cpu_or_gpu) {
-      for (const auto& output_info : bindings_->network_outputs_) {
-        OVTensorPtr graph_output_blob;
-        try {
-          graph_output_blob = infer_request->GetTensor(output_info.name);
-        } catch (const char* msg) {
-          ORT_THROW(msg);
-        }
-        size_t batch_size = 1;
-        Ort::UnownedValue output_tensor =
-            GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
-        auto mem_info = output_tensor.GetTensorMemoryInfo();
-        if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
+  bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
+                    session_context_.device_type.find("GPU") != std::string::npos;
+  if (cpu_or_gpu) {
+    for (const auto& output_info : bindings_->network_outputs_) {
+      OVTensorPtr graph_output_blob;
+      try {
+        graph_output_blob = infer_request->GetTensor(output_info.name);
+      } catch (const char* msg) {
+        ORT_THROW(msg);
+      }
+      size_t batch_size = 1;
+      Ort::UnownedValue output_tensor =
+          GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
+      auto mem_info = output_tensor.GetTensorMemoryInfo();
+      if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
           return;
-        } else {
-          size_t batch_slice = 0;
-          FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice);
-        }
+      } else {
+        size_t batch_slice = 0;
+        FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice);
       }
     }
+  }
 
-    if (!const_outputs_map_.empty()) {
-      for (const auto& item : const_outputs_map_) {
-        const auto& out_name = item.first;
-        auto node = item.second;
+  if (!const_outputs_map_.empty()) {
+    for (const auto& item : const_outputs_map_) {
+      const auto& out_name = item.first;
+      auto node = item.second;
+      try {
         Ort::UnownedValue output_tensor = GetOutputTensor(context,
                                                           out_name,
                                                           subgraph_context_.output_names,
@@ -613,10 +616,10 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
         } else {
           FillOutputsWithConstantData(std::move(node), output_tensor);
         }
+      } catch (std::string const& msg) {
+        ORT_THROW(msg);
       }
     }
-  } catch (const char* msg) {
-    ORT_THROW(msg);
   }
 }
 

From 321351679e02a0e449b363eb3ef05f8539947481 Mon Sep 17 00:00:00 2001
From: TejalKhade28 <tejal.khade@intel.com>
Date: Sat, 31 May 2025 12:43:09 +0530
Subject: [PATCH 2/3] Handle exceptions during parallel execution with OVEP

---
 .../providers/openvino/backends/basic_backend.cc   | 14 +++++++++++---
 .../providers/openvino/backends/basic_backend.h    | 13 +++++++++++++
 .../core/providers/openvino/ov_interface.cc        | 10 +++++++---
 onnxruntime/core/providers/openvino/ov_interface.h |  1 +
 onnxruntime/test/perftest/performance_runner.cc    |  9 ++++++---
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 9085bfea5f0c4..8737016cd902c 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -574,8 +574,10 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
   // Wait for Async inference completion
   try {
     infer_request->WaitRequest();
-  } catch (const char* msg) {
-    ORT_THROW(msg);
+  } catch(const std::runtime_error& e) {
+    infer_request->CancelRequest();
+    inferRequestsQueue_->deleteRequest();
+    ORT_THROW(log_tag + e.what());
   }
 
   bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
@@ -653,9 +655,15 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
     }
 
   } else {
-    // Requesting for an idle infer_request from a pool of infer_requests_
     OVInferRequestPtr infer_request;
     infer_request = inferRequestsQueue_->getIdleRequest();
+    if(infer_request == nullptr) {
+      ORT_THROW("OpenVINO Execution Provider :: There are no inference requests");
+      LOGS_DEFAULT(FATAL) << log_tag << "Create Infer Requests do not exist";
+      return;
+    }
+
+    LOGS_DEFAULT(INFO) << log_tag << "Get Idle Request";
 #ifdef IO_BUFFER_ENABLED
     if ((session_context_.device_type.find("GPU") != std::string::npos) &&
         (session_context_.context != nullptr) && session_context_.is_wholly_supported_graph) {
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 130699abd465b..49fbeeed3af27 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -121,6 +121,7 @@ class InferRequestsQueue {
  public:
   InferRequestsQueue(OVExeNetwork& net, size_t nireq, std::function<void(OVInferRequestPtr)> initializer) {
     OVInferRequestPtr infer_request;
+    live_threads=nireq;
     for (size_t id = 0; id < nireq; id++) {
       infer_request = std::make_shared<OVInferRequest>(net.CreateInferRequest());
       initializer(infer_request);
@@ -152,16 +153,28 @@ class InferRequestsQueue {
 
   OVInferRequestPtr getIdleRequest() {
     std::unique_lock<std::mutex> lock(_mutex);
+    std::cout << "get Idle Request" << live_threads << "\n";
+    if(live_threads==0) {
+      return nullptr;
+    }
+
     _cv.wait(lock, [this] { return infer_requests_.size() > 0; });
     auto request = infer_requests_.at(0);
     infer_requests_.erase(infer_requests_.begin());
     return request;
   }
 
+  void deleteRequest() {
+    std::unique_lock<std::mutex> lock(_mutex);
+    live_threads=live_threads-1;
+    std::cout << "delete Request" << live_threads << "\n";
+  }
+
  private:
   std::mutex _mutex;
   std::condition_variable _cv;
   std::vector<OVInferRequestPtr> infer_requests_;
+  int live_threads;
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index a175ca863d1d1..87da0ade21551 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -294,12 +294,16 @@ void OVInferRequest::Infer() {
 }
 
 void OVInferRequest::WaitRequest() {
+  ovInfReq.wait();
+}
+
+void OVInferRequest::CancelRequest() {
   try {
-    ovInfReq.wait();
+    ovInfReq.cancel();
   } catch (const Exception& e) {
-    ORT_THROW(log_tag + " Wait Model Failed: " + e.what());
+    ORT_THROW(log_tag + " Cancel Model Failed: " + e.what());
   } catch (...) {
-    ORT_THROW(log_tag + " Wait Mode Failed");
+    ORT_THROW(log_tag + " Cancel Mode Failed");
   }
 }
 
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index bebe73bd702dd..079426e2d67fb 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -122,6 +122,7 @@ class OVInferRequest {
   void StartAsync();
   void Infer();
   void WaitRequest();
+  void CancelRequest();
   void QueryStatus();
   explicit OVInferRequest(ov::InferRequest obj) : ovInfReq(std::move(obj)) {}
   OVInferRequest() : ovInfReq(ov::InferRequest()) {}
diff --git a/onnxruntime/test/perftest/performance_runner.cc b/onnxruntime/test/perftest/performance_runner.cc
index faf0c34193717..8ec9694227c14 100644
--- a/onnxruntime/test/perftest/performance_runner.cc
+++ b/onnxruntime/test/perftest/performance_runner.cc
@@ -203,8 +203,9 @@ Status PerformanceRunner::RunParallelDuration() {
       counter++;
       tpool->Schedule([this, &counter, &m, &cv]() {
         auto status = RunOneIteration<false>();
-        if (!status.IsOK())
+        if (!status.IsOK()) {
           std::cerr << status.ErrorMessage();
+        }
         // Simplified version of Eigen::Barrier
         std::lock_guard<std::mutex> lg(m);
         counter--;
@@ -216,8 +217,10 @@ Status PerformanceRunner::RunParallelDuration() {
   } while (duration_seconds.count() < performance_test_config_.run_config.duration_in_seconds);
 
   // Join
-  std::unique_lock<std::mutex> lock(m);
-  cv.wait(lock, [&counter]() { return counter == 0; });
+  tpool->Schedule([this, &counter, &m, &cv]() {
+    std::unique_lock<std::mutex> lock(m);
+    cv.wait(lock, [&counter]() { return counter == 0; });
+  });
 
   return Status::OK();
 }

From fd6bcac279ebade813b8c6f412c82d37bfa211f9 Mon Sep 17 00:00:00 2001
From: TejalKhade28 <tejal.khade@intel.com>
Date: Mon, 2 Jun 2025 15:48:33 +0530
Subject: [PATCH 3/3] Remove IO Buffer Implementation

---
 cmake/onnxruntime_providers_openvino.cmake    |   5 -
 .../openvino/backends/basic_backend.cc        | 148 ------------------
 .../openvino/backends/basic_backend.h         |   9 --
 .../core/providers/openvino/ov_interface.cc   |  32 ----
 .../core/providers/openvino/ov_interface.h    |  19 ---
 5 files changed, 213 deletions(-)

diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake
index 03f67983c70ab..d7cb2d5ea0d0f 100644
--- a/cmake/onnxruntime_providers_openvino.cmake
+++ b/cmake/onnxruntime_providers_openvino.cmake
@@ -30,11 +30,6 @@
   endif()
 
   list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES})
-  if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS}) AND onnxruntime_USE_OPENVINO_GPU)
-    add_definitions(-DIO_BUFFER_ENABLED=1)
-    list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS})
-  endif()
-
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
   onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
 
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 8737016cd902c..dedb6da1bae58 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -62,25 +62,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
   try {
     // IO_BUFFER is enabled on GPU HW.
     // Pre-requisite is provider_option "context" must be set
-#if defined(IO_BUFFER_ENABLED)
-    cl_context ctx = static_cast<cl_context>(session_context_.context);
-    remote_context_ = new ov::intel_gpu::ocl::ClContext(OVCore::Get()->core, ctx);
-    if (subgraph_context_.is_ep_ctx_graph) {
-      exe_network_ = OVCore::Get()->ImportModel(*model_stream,
-                                                remote_context_,
-                                                subgraph_context_.subgraph_name);
-      model_stream.reset();  // Delete stream after it is no longer needed
-    } else {
-      std::string model = model_proto->SerializeAsString();
-      if (!subgraph_context.has_dynamic_input_shape) {
-        model_proto.reset()
-      }
-      auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
-      LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
-      exe_network_ = OVCore::Get()->CompileModel(
-          ov_model, remote_context_, subgraph_context_.subgraph_name);
-    }
-#else  // !IO_BUFFER_ENABLED
     auto auto_unified_compile = ((hw_target.find("AUTO") == std::string::npos) ||
                                  (session_context_.OpenVINO_Version.at(0) >= 2024 &&
                                   session_context_.OpenVINO_Version.at(1) > 2));
@@ -117,7 +98,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
       exe_network_ = OVCore::Get()->CompileModel(
           ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
     }
-#endif
     LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
   } catch (const char* msg) {
     ORT_THROW(msg);
@@ -459,115 +439,6 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
   }
 }
 
-#ifdef IO_BUFFER_ENABLED
-// Wait for Remote Aynchronous inference completion
-void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
-  try {
-    auto graph_input_info = exe_network_.Get().inputs();
-    int input_idx = 0;
-    for (auto input_info_iter = graph_input_info.begin();
-         input_info_iter != graph_input_info.end(); ++input_info_iter) {
-      auto input_names = input_info_iter->get_names();
-      std::string onnx_input_name;
-      std::string input_name;
-      // use names retrieved from original ONNX model to assign the right onnx input name for the graph
-      for (auto it = subgraph_context_.input_names.begin(); it != subgraph_context_.input_names.end(); ++it) {
-        if (it->second == input_idx) {
-          onnx_input_name = it->first;
-          break;
-        }
-      }
-      // using the input name retrieved from ONNX original to match with the input names returned by OV tensors
-      if (input_names.find(onnx_input_name) != input_names.end()) {
-        input_name = onnx_input_name;
-      } else {
-        ORT_THROW(log_tag +
-                  "Input names mismatch between OpenVINO and ONNX. " +
-                  onnx_input_name +
-                  " doesn't exist in the list of OpenVINO input tensor names");
-      }
-      input_idx++;
-      // Kernel Context Input Buffer
-      const auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
-      // If the ORTValue wraps a device pointer
-      auto mem_info = tensor.GetTensorMemoryInfo();
-      if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
-        // Get the shared buffer pointer
-        const void* tensor_data = tensor.GetTensorRawData();
-        const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
-        // Create an Input Remote Blob
-        auto input = graph_input_info.at(0);
-        auto remote_blob = remote_context_->create_tensor(
-            input.get_element_type(), input.get_shape(), *shared_buffer_const);
-        ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
-        infer_request->SetTensor(input_name, tensor_ptr);
-      } else {
-        OVTensorPtr graph_input_blob;
-        graph_input_blob = infer_request->GetTensor(input_name);
-        size_t batch_slice_idx = 0;
-        FillInputBlob(graph_input_blob, batch_slice_idx, input_name, context, subgraph_context_);
-      }
-    }
-
-    // Set the output blob as remote blob
-    auto graph_output_info = exe_network_.Get().outputs();
-    for (auto output_info_iter = graph_output_info.begin();
-         output_info_iter != graph_output_info.end(); ++output_info_iter) {
-      auto output_names = output_info_iter->get_names();
-      std::string onnx_output_name;
-      std::string output_name;
-      bool output_name_found = false;
-      // using the output name retrieved from ONNX original to match with the output names returned by OV tensors
-      for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) {
-        onnx_output_name = it->first;
-        if (output_names.find(onnx_output_name) != output_names.end()) {
-          // Assigning the output_name
-          output_name = it->first;
-          output_name_found = true;
-          break;
-        }
-      }
-      if (!output_name_found) {
-        ORT_THROW(
-            log_tag +
-            "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
-            onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
-      }
-
-      size_t batch_size = 1;
-      Ort::UnownedValue tensor = GetOutputTensor(context,
-                                                 batch_size,
-                                                 infer_request,
-                                                 output_name,
-                                                 subgraph_context_.output_names);
-      auto mem_info = tensor.GetTensorMemoryInfo();
-      // Check if ORT Value wraps a device pointer
-      if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
-        const void* tensor_data = tensor.GetTensorRawData();
-        const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
-        // Create a shared Blob, set the Infer Request Output Blob
-        auto output = graph_output_info.at(0);
-        auto remote_tensor =
-            remote_context_->create_tensor(output.get_element_type(), output.get_shape(), *shared_buffer_const);
-        ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
-        try {
-          infer_request->SetTensor(output_name, tensor_ptr);
-        } catch (const char* msg) {
-          ORT_THROW(msg);
-        }
-      }
-    }
-
-    // Start Async inference
-    infer_request->StartAsync();
-  } catch (const char* msg) {
-    ORT_THROW(msg);
-  }
-}
-#endif
-
 // Wait for asynchronous inference completion on an Infer Request object indexed by infer_req_idx
 // and copy the results into a slice location within the batched output buffer indexed by batch_slice_idx
 void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
@@ -664,28 +535,11 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
     }
 
     LOGS_DEFAULT(INFO) << log_tag << "Get Idle Request";
-#ifdef IO_BUFFER_ENABLED
-    if ((session_context_.device_type.find("GPU") != std::string::npos) &&
-        (session_context_.context != nullptr) && session_context_.is_wholly_supported_graph) {
-      try {
-        StartRemoteAsyncInference(context, infer_request);
-      } catch (std::string const& msg) {
-        ORT_THROW(msg);
-      }
-    } else {
-      try {
-        StartAsyncInference(context, infer_request);
-      } catch (std::string const& msg) {
-        ORT_THROW(msg);
-      }
-    }
-#else
     try {
       StartAsyncInference(context, infer_request);
     } catch (const std::runtime_error& e) {
       ORT_THROW(log_tag + " Exception at StartAsyncInference: " + e.what());
     }
-#endif
     try {
       CompleteAsyncInference(context, infer_request);
     } catch (const std::runtime_error& e) {
@@ -707,13 +561,11 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
     // Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_
     inferRequestsQueue_->putIdleRequest(std::move(infer_request));
 #ifndef NDEBUG
-#ifndef IO_BUFFER_ENABLED  // Printing performance counts is disabled when IO_BUFFER_ENABLED
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       inferRequestsQueue_->printstatus();  // Printing the elements of infer_requests_ vector pool only in debug mode
       std::string& hw_target = session_context_.device_type;
       printPerformanceCounts(std::move(infer_request_), std::cout, hw_target);
     }
-#endif
 #endif
   }
 }
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 49fbeeed3af27..697c088a80620 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -94,11 +94,6 @@ class BasicBackend : public IBackend {
   void EnableStreams();
   void SetNumThreads(ov::AnyMap& device_config);
   void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
-
-#ifdef IO_BUFFER_ENABLED
-  void StartRemoteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
-#endif
-
   void CompleteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
 
   SessionContext& session_context_;
@@ -108,10 +103,6 @@ class BasicBackend : public IBackend {
   OVExeNetwork exe_network_;
   std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
   std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
-#if defined IO_BUFFER_ENABLED
-  OVRemoteContextPtr remote_context_;
-#endif
-
   using ort_tensor_key_t = const std::string;
   std::map<ort_tensor_key_t, ov_tensor_data_t> ort_ov_tensor_map;
   std::unique_ptr<OnnxToOvNetworkBindings> bindings_;
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index 87da0ade21551..0024a5e121bbf 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -143,38 +143,6 @@ void OVCore::SetCache(const std::string& cache_dir_path) {
   core.set_property(ov::cache_dir(cache_dir_path));
 }
 
-#ifdef IO_BUFFER_ENABLED
-OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& model,
-                                  OVRemoteContextPtr context, std::string name) {
-  try {
-    auto obj = core.compile_model(model, *context);
-#ifndef NDEBUG
-    printDebugInfo(obj);
-#endif
-    return OVExeNetwork(obj);
-  } catch (const Exception& e) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what());
-  } catch (...) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);
-  }
-}
-OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_stream,
-                                 OVRemoteContextPtr context, std::string name) {
-  try {
-    auto obj = core.import_model(*model_stream, *context);
-#ifndef NDEBUG
-    printDebugInfo(obj);
-#endif
-    OVExeNetwork exe(obj);
-    return exe;
-  } catch (const Exception& e) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what());
-  } catch (...) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);
-  }
-}
-#endif
-
 std::vector<std::string> OVCore::GetAvailableDevices() const {
   std::vector<std::string> available_devices = core.get_available_devices();
   return available_devices;
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index 079426e2d67fb..866f4a02f7780 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -14,11 +14,6 @@
 #include "openvino/runtime/intel_npu/properties.hpp"
 #include "openvino/pass/convert_fp32_to_fp16.hpp"
 #include "openvino/frontend/manager.hpp"
-
-#ifdef IO_BUFFER_ENABLED
-#include <openvino/runtime/intel_gpu/ocl/ocl.hpp>
-#endif
-
 #include <string>
 
 namespace onnxruntime {
@@ -32,12 +27,6 @@ typedef ov::ProfilingInfo OVProfilingInfo;
 typedef ov::Model OVNetwork;
 typedef std::shared_ptr<OVInferRequest> OVInferRequestPtr;
 typedef std::shared_ptr<OVTensor> OVTensorPtr;
-
-#ifdef IO_BUFFER_ENABLED
-typedef ov::intel_gpu::ocl::ClContext* OVRemoteContextPtr;
-typedef ov::RemoteContext OVRemoteContext;
-#endif
-
 std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type);
 
 template <typename T>
@@ -87,14 +76,6 @@ struct OVCore : WeakSingleton<OVCore> {
                            std::string hw_target,
                            const ov::AnyMap& device_config,
                            std::string name);
-#ifdef IO_BUFFER_ENABLED
-  OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& model,
-                            OVRemoteContextPtr context,
-                            std::string name);
-  OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream,
-                           OVRemoteContextPtr context,
-                           std::string name);
-#endif
   std::vector<std::string> GetAvailableDevices() const;
   std::vector<std::string> GetAvailableDevices(const std::string& device_type) const;
   void SetCache(const std::string& cache_dir_path);