From 3eaaa8d7a71b581b0d483a4d73a8012f150e1b45 Mon Sep 17 00:00:00 2001
From: "Peng, Bo" <bo.peng@intel.com>
Date: Fri, 31 Oct 2025 10:28:33 +0800
Subject: [PATCH 1/5] disable bfloat16 conversion when single cast node to
 bfloat16, unit test case

---
 onnxruntime/core/providers/openvino/backend_manager.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 74999ab10a67d..11667c73cb781 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -598,7 +598,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } else if (IsModelBF16(subgraph)) {
+  } else if (IsModelBF16(subgraph) && subgraph.GetNodesInTopologicalOrder().size() > 1) {  // don't apply conversion when single cast node graph (unit test case)
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = bfloat16_fix::Transform(subgraph, logger, model);

From 18278585aa5bbfed73567d71dbb57ee24d1e8e9e Mon Sep 17 00:00:00 2001
From: "Peng, Bo" <bo.peng@intel.com>
Date: Mon, 3 Nov 2025 15:28:00 +0800
Subject: [PATCH 2/5] Insert a Cast(To:BFloat16) before output node(bfloat16)
 to keep user use original bf16 outputs tensor

---
 .../providers/openvino/backend_manager.cc     |  2 +-
 .../qdq_transformations/qdq_scales_fix.cpp    | 40 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 11667c73cb781..74999ab10a67d 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -598,7 +598,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } else if (IsModelBF16(subgraph) && subgraph.GetNodesInTopologicalOrder().size() > 1) {  // don't apply conversion when single cast node graph (unit test case)
+  } else if (IsModelBF16(subgraph)) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = bfloat16_fix::Transform(subgraph, logger, model);
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
index de0e8a97fb6b0..df82119953f49 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
@@ -958,7 +958,47 @@ Status Transform(const GraphViewer& src_graph_viewer,
 
 namespace bfloat16_fix {
 void replace_bf16_with_fp16(qdq_scales_fix::CustomGraph& gen_graph) {
+  auto& graph = gen_graph.original_graph;
+  std::unordered_set<NodeIndex> protected_nodes;
+
+  // To keep the BF16 output, insert a Cast node before it.
+  // Add the inserted Cast node and the Identity node to protected_nodes.
+  // The data flow becomes: Modified Internal Graph (FP16) -> Cast(to BF16) -> Identity -> Output(BF16).
+  for (const auto* output_arg : graph.GetOutputs()) {
+    if (output_arg->TypeAsProto() &&
+        output_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16) {
+      const Node* identity_node = graph.GetProducerNode(output_arg->Name());
+      if (!identity_node || identity_node->OpType() != "Identity") {
+        continue;
+      }
+      protected_nodes.insert(identity_node->Index());
+
+      // Create the new Cast node to keep bf16 output.
+      std::string cast_node_name = "InsertCastToBf16_" + output_arg->Name();
+      const NodeArg* cast_input_arg = identity_node->InputDefs()[0];
+      auto& cast_output_arg = graph.GetOrCreateNodeArg(cast_input_arg->Name() + "_bf16", output_arg->TypeAsProto());
+      InlinedVector<NodeArg*> cast_inputs = {const_cast<NodeArg*>(cast_input_arg)};
+      InlinedVector<NodeArg*> cast_outputs = {&cast_output_arg};
+      Node& cast_node = graph.AddNode(cast_node_name, "Cast", "Cast internal FP16 to BF16 output", cast_inputs, cast_outputs, nullptr, "");
+      cast_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16));
+      protected_nodes.insert(cast_node.Index());
+
+      // Reroute the graph edges.
+      auto edge_it = identity_node->InputEdgesBegin();
+      if (edge_it != identity_node->InputEdgesEnd()) {
+        const Node& producer_node = edge_it->GetNode();
+        int producer_arg_index = edge_it->GetSrcArgIndex();
+        graph.RemoveEdge(producer_node.Index(), identity_node->Index(), producer_arg_index, 0);
+        graph.AddEdge(producer_node.Index(), cast_node.Index(), producer_arg_index, 0);
+        graph.AddEdge(cast_node.Index(), identity_node->Index(), 0, 0);
+      }
+    }
+  }
+
   for (auto& const_node : gen_graph.original_graph.Nodes()) {
+    if (protected_nodes.count(const_node->Index())) {
+      continue;
+    }
     auto node = const_cast<ONNX_NAMESPACE::Node*>(const_node);
     if (node->OpType() == "Cast") {
       for (auto& [name, const_attribute] : node->GetAttributes()) {

From 415fdfeb10ce988496dd6677d8d3f13c38824992 Mon Sep 17 00:00:00 2001
From: "Peng, Bo" <bo.peng@intel.com>
Date: Mon, 3 Nov 2025 16:42:05 +0800
Subject: [PATCH 3/5] revert changes to add Cast Node, add statement to disable
 bfloat16 transform for OV CPU

---
 .../providers/openvino/backend_manager.cc     |  2 +-
 .../qdq_transformations/qdq_scales_fix.cpp    | 40 -------------------
 2 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 74999ab10a67d..741b76ce203c9 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -598,7 +598,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } else if (IsModelBF16(subgraph)) {
+  } else if ((session_context_.device_type.find("CPU") == std::string::npos) && IsModelBF16(subgraph)) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = bfloat16_fix::Transform(subgraph, logger, model);
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
index df82119953f49..de0e8a97fb6b0 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
@@ -958,47 +958,7 @@ Status Transform(const GraphViewer& src_graph_viewer,
 
 namespace bfloat16_fix {
 void replace_bf16_with_fp16(qdq_scales_fix::CustomGraph& gen_graph) {
-  auto& graph = gen_graph.original_graph;
-  std::unordered_set<NodeIndex> protected_nodes;
-
-  // To keep the BF16 output, insert a Cast node before it.
-  // Add the inserted Cast node and the Identity node to protected_nodes.
-  // The data flow becomes: Modified Internal Graph (FP16) -> Cast(to BF16) -> Identity -> Output(BF16).
-  for (const auto* output_arg : graph.GetOutputs()) {
-    if (output_arg->TypeAsProto() &&
-        output_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16) {
-      const Node* identity_node = graph.GetProducerNode(output_arg->Name());
-      if (!identity_node || identity_node->OpType() != "Identity") {
-        continue;
-      }
-      protected_nodes.insert(identity_node->Index());
-
-      // Create the new Cast node to keep bf16 output.
-      std::string cast_node_name = "InsertCastToBf16_" + output_arg->Name();
-      const NodeArg* cast_input_arg = identity_node->InputDefs()[0];
-      auto& cast_output_arg = graph.GetOrCreateNodeArg(cast_input_arg->Name() + "_bf16", output_arg->TypeAsProto());
-      InlinedVector<NodeArg*> cast_inputs = {const_cast<NodeArg*>(cast_input_arg)};
-      InlinedVector<NodeArg*> cast_outputs = {&cast_output_arg};
-      Node& cast_node = graph.AddNode(cast_node_name, "Cast", "Cast internal FP16 to BF16 output", cast_inputs, cast_outputs, nullptr, "");
-      cast_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16));
-      protected_nodes.insert(cast_node.Index());
-
-      // Reroute the graph edges.
-      auto edge_it = identity_node->InputEdgesBegin();
-      if (edge_it != identity_node->InputEdgesEnd()) {
-        const Node& producer_node = edge_it->GetNode();
-        int producer_arg_index = edge_it->GetSrcArgIndex();
-        graph.RemoveEdge(producer_node.Index(), identity_node->Index(), producer_arg_index, 0);
-        graph.AddEdge(producer_node.Index(), cast_node.Index(), producer_arg_index, 0);
-        graph.AddEdge(cast_node.Index(), identity_node->Index(), 0, 0);
-      }
-    }
-  }
-
   for (auto& const_node : gen_graph.original_graph.Nodes()) {
-    if (protected_nodes.count(const_node->Index())) {
-      continue;
-    }
     auto node = const_cast<ONNX_NAMESPACE::Node*>(const_node);
     if (node->OpType() == "Cast") {
       for (auto& [name, const_attribute] : node->GetAttributes()) {

From a6f7cbb15f1a691b3ef9e7e805cc5fa8c79a3757 Mon Sep 17 00:00:00 2001
From: "Peng, Bo" <bo.peng@intel.com>
Date: Thu, 6 Nov 2025 19:25:52 +0800
Subject: [PATCH 4/5] remove bfloat16 silence conversion

---
 .../providers/openvino/backend_manager.cc     | 22 --------
 .../qdq_transformations/qdq_scales_fix.cpp    | 55 -------------------
 .../qdq_transformations/qdq_scales_fix.h      |  5 --
 3 files changed, 82 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 741b76ce203c9..4a20847c0890c 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -389,18 +389,6 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
   return false;
 }
 
-static bool IsModelBF16(const onnxruntime::GraphViewer& graph_viewer) {
-  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
-  for (std::size_t i = 0; i < node_indices.size(); i++) {
-    gsl::not_null<const onnxruntime::Node*> node(graph_viewer.GetNode(node_indices[i]));
-    for (auto& output : node->OutputDefs()) {
-      if (output->ToProto().type().tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16)
-        return true;
-    }
-  }
-  return false;
-}
-
 static bool Is16BitTensor(const onnxruntime::NodeArg* node_arg) {
   const auto* type_proto = node_arg ? node_arg->TypeAsProto() : nullptr;
   return type_proto && type_proto->has_tensor_type() &&
@@ -598,16 +586,6 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } else if ((session_context_.device_type.find("CPU") == std::string::npos) && IsModelBF16(subgraph)) {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
-    std::unique_ptr<onnxruntime::Model> model;
-    Status status = bfloat16_fix::Transform(subgraph, logger, model);
-    auto model_proto = model->ToProto();
-    model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
-    print_model_proto_duration();
-    DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
-    ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
-    return model_proto;
   } else {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
 
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
index de0e8a97fb6b0..161100f63fd26 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
@@ -955,60 +955,5 @@ Status Transform(const GraphViewer& src_graph_viewer,
   return status;
 }
 }  // namespace qdq_scales_fix
-
-namespace bfloat16_fix {
-void replace_bf16_with_fp16(qdq_scales_fix::CustomGraph& gen_graph) {
-  for (auto& const_node : gen_graph.original_graph.Nodes()) {
-    auto node = const_cast<ONNX_NAMESPACE::Node*>(const_node);
-    if (node->OpType() == "Cast") {
-      for (auto& [name, const_attribute] : node->GetAttributes()) {
-        auto& attribute = const_cast<ONNX_NAMESPACE::AttributeProto&>(const_attribute);
-        if (name == "to" && attribute.type() == ONNX_NAMESPACE::AttributeProto_AttributeType_INT)
-          if (attribute.i() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16)
-            attribute.set_i(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
-      }
-    }
-    for (auto& output : node->OutputDefs()) {
-      auto& output_proto = const_cast<ONNX_NAMESPACE::TypeProto&>(output->ToProto().type());
-      if (output_proto.mutable_tensor_type()->elem_type() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16)
-        output_proto.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
-    }
-  }
-
-  for (auto& node : gen_graph.original_graph.Nodes()) {
-    for (auto& input_def : node->InputDefs()) {
-      ORT_THROW_IF_ERROR(graph_utils::ConvertInMemoryDataToInline(gen_graph.original_graph, input_def->Name()));
-    }
-  }
-
-  const auto& init_set = gen_graph.original_graph.GetAllInitializedTensors();
-  for (auto& [key, const_tensor_proto] : init_set) {
-    auto tensor_proto = const_cast<ONNX_NAMESPACE::TensorProto*>(const_tensor_proto);
-    auto dt = tensor_proto->data_type();
-    if (dt == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16) {
-      auto raw_data = tensor_proto->has_raw_data() ? reinterpret_cast<std::uint16_t*>(tensor_proto->mutable_raw_data()->data()) : nullptr;
-      if (raw_data) {
-        tensor_proto->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
-        std::int64_t size = 1;
-        for (int i = 0; i < tensor_proto->dims_size(); ++i)
-          size *= tensor_proto->dims()[i];
-        for (std::int64_t i = 0; i < size; ++i) {
-          raw_data[i] = onnxruntime::MLFloat16(onnxruntime::BFloat16::FromBits(raw_data[i])).val;
-        }
-      }
-    }
-  }
-}
-
-Status Transform(const GraphViewer& src_graph_viewer,
-                 const logging::Logger& logger,
-                 /*out*/ std::unique_ptr<onnxruntime::Model>& model) {
-  auto status = qdq_scales_fix::copy_model(src_graph_viewer, logger, model);
-  auto g = qdq_scales_fix::generate_graph_from_onnx(model->MainGraph());
-
-  replace_bf16_with_fp16(g);
-  return status;
-}
-}  // namespace bfloat16_fix
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h
index 2182850d96c43..c54c531e1bd40 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h
@@ -15,10 +15,5 @@ Status Transform(const GraphViewer& src_graph,
                  const logging::Logger& logger,
                  /*out*/ std::unique_ptr<onnxruntime::Model>& model);
 }
-namespace bfloat16_fix {
-Status Transform(const GraphViewer& src_graph,
-                 const logging::Logger& logger,
-                 /*out*/ std::unique_ptr<onnxruntime::Model>& model);
-}
 }  // namespace openvino_ep
 }  // namespace onnxruntime

From 46b1c78d1674330fbb1babdac8a9cb94f6bf547b Mon Sep 17 00:00:00 2001
From: "Peng, Bo" <bo.peng@intel.com>
Date: Fri, 7 Nov 2025 10:27:23 +0800
Subject: [PATCH 5/5] remove bf16 testing and cpu support for openvino

---
 .../openvino/ov_versions/data_ops.cc          |   4 +-
 .../qdq_transformations/qdq_scales_fix.cpp    |   1 -
 .../openvino_ep_bfloat16_pass_test.cc         | 116 ------------------
 3 files changed, 1 insertion(+), 120 deletions(-)
 delete mode 100644 onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc

diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 037cb6a1270ea..4156b45cd638a 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -561,9 +561,7 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
   }
 
   auto dtype = type_proto->tensor_type().elem_type();
-  // Enable bfloat16 -> float16 on-the-fly conversion
-  if (dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16 ||
-      dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16 ||
+  if (dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16 ||
       dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16)
     return true;
   if (is_initializer) {
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
index 161100f63fd26..a7b5c51882ff4 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
@@ -4,7 +4,6 @@
 #include "qdq_scales_fix.h"
 #include "core/providers/openvino/ov_protobuf_utils.h"
 #include "core/framework/ort_value.h"
-#include "core/common/float16.h"
 
 #include <fstream>
 #include <list>
diff --git a/onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc
deleted file mode 100644
index 105a35011a78d..0000000000000
--- a/onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include <filesystem>
-#include <map>
-#include <string>
-
-#include "core/session/onnxruntime_cxx_api.h"
-#include "core/common/float16.h"
-
-#include "test/util/include/test/test_environment.h"
-#include "test/unittest_util/qdq_test_utils.h"
-
-#include "gtest/gtest.h"
-#include "gmock/gmock.h"
-
-using namespace ONNX_NAMESPACE;
-using namespace onnxruntime::logging;
-
-extern std::unique_ptr<Ort::Env> ort_env;
-
-class OVEP_BF16_Tests : public ::testing::TestWithParam<std::string> {};
-
-namespace detail {
-auto ConstructModel() {
-  using namespace onnxruntime;
-  using namespace test;
-
-  std::unordered_map<std::string, int> domain_to_version;
-  domain_to_version[kOnnxDomain] = 19;
-  Model model("Bfloat16Tester", true, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
-              domain_to_version, {}, DefaultLoggingManager().DefaultLogger());
-
-  Graph& graph = model.MainGraph();
-  ModelTestBuilder builder(graph);
-  auto dim = 4;
-  std::vector<float> input_data(dim, 1.0f);
-  auto* input = builder.MakeInput<float>({dim}, input_data);
-  builder.graph_.SetInputs({input});
-
-  auto* cast_to_bf16 = builder.MakeIntermediate();
-  Node& cast_node = builder.AddNode("Cast", {input}, {cast_to_bf16}, "");
-  cast_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16));
-
-  std::vector<onnxruntime::BFloat16> weight_data(dim * dim);
-  for (std::size_t i = 0; i < weight_data.size(); ++i)
-    weight_data[i] = onnxruntime::BFloat16(static_cast<float>(i % dim) / dim);
-  auto* weights = builder.MakeInitializer<onnxruntime::BFloat16>({dim, dim}, weight_data);
-
-  auto* matmul_out = builder.MakeIntermediate();
-  builder.AddNode("MatMul", {cast_to_bf16, weights}, {matmul_out});
-
-  std::vector<onnxruntime::BFloat16> weight_data_2(dim * dim);
-  for (std::size_t i = 0; i < weight_data_2.size(); ++i)
-    weight_data_2[i] = onnxruntime::BFloat16(static_cast<float>(i % dim) / dim);
-  auto* weights_2 = builder.MakeInitializer<onnxruntime::BFloat16>({dim, dim}, weight_data_2);
-
-  auto* matmul_out_2 = builder.MakeIntermediate();
-  builder.AddNode("MatMul", {matmul_out, weights_2}, {matmul_out_2});
-
-  auto* output = builder.MakeOutput();
-  Node& cast2_node = builder.AddNode("Cast", {matmul_out_2}, {output});
-  cast2_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT));
-
-  builder.SetGraphOutputs();
-  auto st = model.MainGraph().Resolve();
-  if (st != Status::OK())
-    throw std::runtime_error(st.ErrorMessage());
-  return model;
-}
-
-auto ProbeDevice(const std::string& device) {
-  static std::map<std::string, bool> is_present;
-  if (is_present.find(device) == is_present.end()) {
-    Ort::SessionOptions sessionOptions;
-    std::unordered_map<std::string, std::string> ov_options;
-    ov_options["device_type"] = device;
-    try {
-      sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
-      is_present[device] = true;
-    } catch (...) {
-      is_present[device] = false;
-    }
-  }
-  return is_present[device];
-}
-}  // namespace detail
-
-namespace onnxruntime {
-namespace test {
-
-TEST_P(OVEP_BF16_Tests, TestModelConversion) {
-  Ort::SessionOptions sessionOptions;
-  std::unordered_map<std::string, std::string> ov_options;
-  const auto& device = GetParam();
-  if (!::detail::ProbeDevice(device))
-    GTEST_SKIP() << device + " is not available on this machine";
-
-  ov_options["device_type"] = device;
-  auto model = ::detail::ConstructModel();
-  sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
-
-  std::string model_data;
-  model.ToProto().SerializeToString(&model_data);
-  auto model_data_span = AsByteSpan(model_data.data(), model_data.size());
-  try {
-    Ort::Session session(*ort_env, model_data_span.data(), model_data_span.size(), sessionOptions);
-  } catch (...) {
-    FAIL();
-  }
-}
-INSTANTIATE_TEST_SUITE_P(OVEP_Tests,
-                         OVEP_BF16_Tests,
-                         ::testing::Values("CPU", "GPU", "NPU"));
-}  // namespace test
-}  // namespace onnxruntime