From d062cf131d7ed8729b33d8eb40f4ff814f429ded Mon Sep 17 00:00:00 2001
From: "Klimenko, Mikhail" <mikhail.klimenko@intel.com>
Date: Fri, 11 Jul 2025 15:57:47 +0200
Subject: [PATCH 1/6] Add on-the-fly bfloat16->float16 conversion pass

---
 .../providers/openvino/backend_manager.cc     | 13 ++++-
 .../core/providers/openvino/contexts.h        |  3 +-
 .../openvino/openvino_execution_provider.cc   |  3 +-
 .../openvino/openvino_provider_factory.cc     |  2 +
 .../openvino/ov_versions/capability.cc        | 17 +++---
 .../openvino/ov_versions/capability.h         |  3 +-
 .../openvino/ov_versions/data_ops.cc          |  3 +
 .../providers/openvino/ov_versions/data_ops.h |  7 ++-
 .../qdq_transformations/qdq_scales_fix.cpp    | 55 ++++++++++++++++++-
 .../qdq_transformations/qdq_scales_fix.h      |  5 ++
 .../core/session/provider_bridge_ort.cc       |  1 +
 .../python/onnxruntime_pybind_state.cc        |  3 +-
 onnxruntime/test/perftest/ort_test_session.cc |  7 +++
 13 files changed, 104 insertions(+), 18 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 65532c31e14bd..567d05427a0df 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -453,7 +453,18 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } else {
+  } 
+  else if (session_context_.enable_bfloat16_optimizer) {
+    std::unique_ptr<onnxruntime::Model> model;
+    Status status = bfloat16_fix::Transform(subgraph, logger, model);
+    auto model_proto = model->ToProto();
+    model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
+    print_model_proto_duration();
+    DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
+    ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
+    return model_proto;
+  }
+  else {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
     auto model = subgraph.CreateModel(logger);
     auto model_proto = model->ToProto();
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 6a2b375d733f9..1ed89253f27ac 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -101,6 +101,7 @@ struct ProviderInfo {
   bool disable_dynamic_shapes{false};      // [disable_dynamic_shapes]:  Rewrite dynamic shaped models to
                                            // static shape at runtime and execute.
   bool enable_qdq_optimizer{false};        // Enables QDQ pruning for efficient inference latency with NPU
+  bool enable_bfloat16_optimizer{false};   // Enables on-the-fly bfloat16->float16 conversion
   bool enable_causallm{false};             // Enables Causal LM Compilation for ORT GenAI OVEP Pass
   bool so_context_enable{false};           // ORT session option
   bool so_disable_cpu_ep_fallback{false};  // ORT session option
@@ -110,7 +111,7 @@ struct ProviderInfo {
   const ConfigOptions* config_options{NULL};
   const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input"};
+                                                               "enable_bfloat16_optimizer", "enable_causallm", "disable_dynamic_shapes", "reshape_input"};
 };
 
 // Holds context applicable to the entire EP instance.
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index a0aa04293ac37..825a79520a5e1 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -81,7 +81,8 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   openvino_ep::GetCapability obj(ep_ctx_handle_,
                                  graph_viewer,
                                  session_context_.device_type,
-                                 session_context_.enable_qdq_optimizer);
+                                 session_context_.enable_qdq_optimizer,
+                                 session_context_.enable_bfloat16_optimizer);
   result = obj.Execute();
   session_context_.is_wholly_supported_graph = obj.IsWhollySupportedGraph();
   session_context_.has_external_weights = obj.HasExternalWeights();
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index bad1d416eeda2..67dc1f1c56527 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -340,6 +340,8 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
 
     pi.enable_qdq_optimizer = ParseBooleanOption(provider_options, "enable_qdq_optimizer");
 
+    pi.enable_bfloat16_optimizer = ParseBooleanOption(provider_options, "enable_bfloat16_optimizer");
+
     pi.enable_causallm = ParseBooleanOption(provider_options, "enable_causallm");
 
     pi.disable_dynamic_shapes = ParseBooleanOption(provider_options, "disable_dynamic_shapes");
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 88ddde8610c6e..4d844f233297f 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -30,9 +30,10 @@ namespace openvino_ep {
 GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler,
                              const GraphViewer& graph_viewer_param,
                              const std::string device_type_param,
-                             const bool enable_qdq_optimizer) : ep_ctx_handler_(ep_ctx_handler),
-                                                                graph_viewer_(graph_viewer_param),
-                                                                device_type_(std::move(device_type_param)) {
+                             const bool enable_qdq_optimizer,
+                             bool enable_bfloat16_optimizer) : ep_ctx_handler_(ep_ctx_handler),
+                                                               graph_viewer_(graph_viewer_param),
+                                                               device_type_(std::move(device_type_param)) {
   bool npu_qdq_optimizer_enabled = false;
   if (device_type_.find("NPU") != std::string::npos) {
     device_type_ = "CPU";
@@ -42,15 +43,15 @@ GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler,
   }
 
 #if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
-  data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
 #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 6
-  data_ops_ = new DataOps(graph_viewer_, V_2024_6, device_type_, npu_qdq_optimizer_enabled);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_6, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
 #elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 0
-  data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled);
+  data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
 #elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 1
-  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
+  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
 #else
-  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
+  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
 #endif
 }
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h
index 364e79a76f154..786768d0ba3c4 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h
@@ -24,7 +24,8 @@ class GetCapability {
   GetCapability(const EPCtxHandler& ep_ctx_handler,
                 const GraphViewer& graph_viewer_param,
                 const std::string device_type_param,
-                const bool enable_qdq_optimizer);
+                const bool enable_qdq_optimizer,
+                bool enable_bfloat16_optimizer);
   virtual std::vector<std::unique_ptr<ComputeCapability>> Execute();
   bool IsWhollySupportedGraph() {
     return is_wholly_supported_graph_;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 84001c1161efc..868b968fde7a1 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -617,6 +617,9 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
         // experimentally for GPU and qdq stripping mode allow int16 types
         if (npu_qdq_optimizer_enabled_ && (dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16 || dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16))
           return true;
+        // Enable bfloat16 -> float16 on-the-fly conversion
+        if (bfloat16_optimizer_enabled_ && dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16)
+          return true;
       }
 #ifndef NDEBUG
       if (openvino_ep::backend_utils::IsDebugEnabled()) {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index cf7d834d6cfc7..4ee3d9736d29c 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -69,6 +69,7 @@ class DataOps {
   std::set<Pairs> supported_types_gpu_;
   std::set<Pairs> supported_types_initializer_;
   bool npu_qdq_optimizer_enabled_;
+  bool bfloat16_optimizer_enabled_;
 
  protected:
   void populate_op_mode_supported();
@@ -81,11 +82,13 @@ class DataOps {
 
  public:
   DataOps(const GraphViewer& graph_viewer_param, VersionNum ver,
-          const std::string dev_id, const bool npu_qdq_optimizer_enabled)
+          const std::string dev_id, const bool npu_qdq_optimizer_enabled,
+          bool bfloat16_optimizer_enabled)
       : graph_viewer_(graph_viewer_param),
         version_id_(ver),
         device_id_(std::move(dev_id)),
-        npu_qdq_optimizer_enabled_(npu_qdq_optimizer_enabled) {
+        npu_qdq_optimizer_enabled_(npu_qdq_optimizer_enabled),
+        bfloat16_optimizer_enabled_(bfloat16_optimizer_enabled) {
     populate_op_mode_supported();
     populate_types_supported();
   }
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
index 571aa57c99f33..3a11168920c3f 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
@@ -3,6 +3,7 @@
 
 #include "qdq_scales_fix.h"
 #include "core/providers/openvino/ov_protobuf_utils.h"
+#include "core/framework/float16.h"
 
 #include <fstream>
 #include <list>
@@ -605,8 +606,7 @@ float get_initializer_value(const Graph& graph, const std::string& initializer_n
     auto size = get_initializer_size(graph, initializer_name);
     ORT_ENFORCE(size == 1, "Expected an initializer to be of size 1");
     return raw_data[0];
-  }
-  else
+  } else
     return get_float_initializer_data(p_initializer);
 }
 
@@ -775,7 +775,6 @@ bool scale_graph(CustomGraph& gen_graph,
   return needs_second_run;
 }
 
-
 Status copy_model(const GraphViewer& src_graph_viewer,
                   const logging::Logger& logger, std::unique_ptr<onnxruntime::Model>& model) {
   model = src_graph_viewer.CreateModel(logger);
@@ -942,5 +941,55 @@ Status Transform(const GraphViewer& src_graph_viewer,
   return status;
 }
 }  // namespace qdq_scales_fix
+
+namespace bfloat16_fix {
+void replace_bf16_with_fp16(qdq_scales_fix::CustomGraph& gen_graph) {
+  for (auto& const_node : gen_graph.original_graph.Nodes()) {
+    auto node = const_cast<ONNX_NAMESPACE::Node*>(const_node);
+    if (node->OpType() == "Cast") {
+      for (auto& [name, const_attribute] : node->GetAttributes()) {
+        auto& attribute = const_cast<ONNX_NAMESPACE::AttributeProto&>(const_attribute);
+        if (name == "to" && attribute.type() == ONNX_NAMESPACE::AttributeProto_AttributeType_INT)
+          if (attribute.i() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16)
+            attribute.set_i(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
+      }
+    }
+    for (auto& output : node->OutputDefs()) {
+      auto& output_proto = const_cast<ONNX_NAMESPACE::TypeProto&>(output->ToProto().type());
+      if (output_proto.mutable_tensor_type()->elem_type() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16)
+        output_proto.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
+    }
+  }
+
+  const auto& init_set = gen_graph.original_graph.GetAllInitializedTensors();
+  for (auto& [key, const_tensor_proto] : init_set) {
+    auto tensor_proto = const_cast<ONNX_NAMESPACE::TensorProto*>(const_tensor_proto);
+    auto dt = tensor_proto->data_type();
+    if (dt == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16) {
+      auto raw_data = tensor_proto->has_raw_data() ? reinterpret_cast<std::uint16_t*>(tensor_proto->mutable_raw_data()->data()) : nullptr;
+      if (raw_data) {
+        tensor_proto->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
+        std::int64_t size = 1;
+        for (int i = 0; i < tensor_proto->dims_size(); ++i)
+          size *= tensor_proto->dims()[i];
+        for (std::int64_t i = 0; i < size; ++i) {
+          std::uint32_t tmp = static_cast<std::uint32_t>(raw_data[i]) << 16;
+          raw_data[i] = onnxruntime::MLFloat16(*reinterpret_cast<float*>(&tmp)).val;
+        }
+      }
+    }
+  }
+}
+
+Status Transform(const GraphViewer& src_graph_viewer,
+                 const logging::Logger& logger,
+                 /*out*/ std::unique_ptr<onnxruntime::Model>& model) {
+  auto status = qdq_scales_fix::copy_model(src_graph_viewer, logger, model);
+  auto g = qdq_scales_fix::generate_graph_from_onnx(model->MainGraph());
+
+  replace_bf16_with_fp16(g);
+  return status;
+}
+}  // namespace bfloat16_fix
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h
index c54c531e1bd40..2182850d96c43 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.h
@@ -15,5 +15,10 @@ Status Transform(const GraphViewer& src_graph,
                  const logging::Logger& logger,
                  /*out*/ std::unique_ptr<onnxruntime::Model>& model);
 }
+namespace bfloat16_fix {
+Status Transform(const GraphViewer& src_graph,
+                 const logging::Logger& logger,
+                 /*out*/ std::unique_ptr<onnxruntime::Model>& model);
+}
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 3db35ae8769e0..3c3a1942a4cf1 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -2144,6 +2144,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
   ov_options_converted_map["load_config"] = "";
   ov_options_converted_map["model_priority"] = "DEFAULT";
   ov_options_converted_map["enable_qdq_optimizer"] = "false";
+  ov_options_converted_map["enable_bfloat16_optimizer"] = "false";
   ov_options_converted_map["enable_causallm"] = "false";
   return ov_options_converted_map;
 }
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index bdc4f65e590d9..cad66e4fd3566 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1182,7 +1182,8 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
 #if defined(USE_OPENVINO) || defined(USE_OPENVINO_PROVIDER_INTERFACE)
     ProviderOptions OV_provider_options_map;
     const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
-                                                                 "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
+                                                                 "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", 
+                                                                 "enable_qdq_optimizer", "enable_bfloat16_optimizer",
                                                                  "enable_causallm", "disable_dynamic_shapes", "reshape_input"};
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 7a210ca8482a4..5f2ea1cf2e2d7 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -764,6 +764,13 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         } else {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_qdq_optimizer' should be a boolean i.e. true or false. Default value is false.\n");
         }
+      } else if (key == "enable_bfloat16_optimizer") {
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
+        } else {
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_bfloat16_optimizer' should be a boolean i.e. true or false. Default value is false.\n");
+        }
       } else if (key == "enable_causallm") {
         if (value == "true" || value == "True" ||
             value == "false" || value == "False") {

From 4a2c571859452dcea2f19d116a05ad541533c86f Mon Sep 17 00:00:00 2001
From: "Klimenko, Mikhail" <mikhail.klimenko@intel.com>
Date: Mon, 14 Jul 2025 07:55:49 +0000
Subject: [PATCH 2/6] Fix undetected bfloat16 initializers

---
 onnxruntime/core/providers/openvino/backend_manager.cc |  1 +
 .../core/providers/openvino/ov_versions/data_ops.cc    | 10 ++++------
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 567d05427a0df..8bc99a7e0907d 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -455,6 +455,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     return model_proto;
   } 
   else if (session_context_.enable_bfloat16_optimizer) {
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = bfloat16_fix::Transform(subgraph, logger, model);
     auto model_proto = model->ToProto();
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 868b968fde7a1..a8db03170723d 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -560,8 +560,11 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
     return false;
   }
 
+  auto dtype = type_proto->tensor_type().elem_type();
+  // Enable bfloat16 -> float16 on-the-fly conversion
+  if (bfloat16_optimizer_enabled_ && dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16)
+    return true;
   if (is_initializer) {
-    auto dtype = type_proto->tensor_type().elem_type();
     for (auto const& var : supported_types_initializer_) {
       if ((var.first <= version_id_) &&
           (var.second == dtype)) {
@@ -576,8 +579,6 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
 #endif
     return false;
   } else {
-    auto dtype = type_proto->tensor_type().elem_type();
-
     if (device_id_.find("HETERO") != std::string::npos ||
         device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) {
       for (auto const& var : supported_types_npu_) {
@@ -617,9 +618,6 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
         // experimentally for GPU and qdq stripping mode allow int16 types
         if (npu_qdq_optimizer_enabled_ && (dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16 || dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16))
           return true;
-        // Enable bfloat16 -> float16 on-the-fly conversion
-        if (bfloat16_optimizer_enabled_ && dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16)
-          return true;
       }
 #ifndef NDEBUG
       if (openvino_ep::backend_utils::IsDebugEnabled()) {

From 601dd30a23346dd3da855241900bf4560fbadc41 Mon Sep 17 00:00:00 2001
From: "Klimenko, Mikhail" <mikhail.klimenko@intel.com>
Date: Mon, 14 Jul 2025 11:54:07 +0000
Subject: [PATCH 3/6] Remove the option and make the logic implicit

---
 .../core/providers/openvino/backend_manager.cc | 18 ++++++++++++++----
 onnxruntime/core/providers/openvino/contexts.h |  3 +--
 .../openvino/openvino_execution_provider.cc    |  3 +--
 .../openvino/openvino_provider_factory.cc      |  2 --
 .../openvino/ov_versions/capability.cc         | 17 ++++++++---------
 .../openvino/ov_versions/capability.h          |  3 +--
 .../providers/openvino/ov_versions/data_ops.cc |  2 +-
 .../providers/openvino/ov_versions/data_ops.h  |  7 ++-----
 .../core/session/provider_bridge_ort.cc        |  1 -
 onnxruntime/python/onnxruntime_pybind_state.cc |  3 +--
 onnxruntime/test/perftest/ort_test_session.cc  |  7 -------
 11 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 8bc99a7e0907d..ceec01353d3f6 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -372,6 +372,18 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
   return false;
 }
 
+static bool HasBf16(const onnxruntime::GraphViewer& graph_viewer) {
+  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
+  for (std::size_t i = 0; i < node_indices.size(); i++) {
+    gsl::not_null<const onnxruntime::Node*> node(graph_viewer.GetNode(node_indices[i]));
+    for (auto& output : node->OutputDefs()) {
+      if (output->ToProto().type().tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16)
+        return true;
+    }
+  }
+  return false;
+}
+
 static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name,
                                 [[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto,
                                 [[maybe_unused]] const onnxruntime::Node& fused_node) {
@@ -453,8 +465,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } 
-  else if (session_context_.enable_bfloat16_optimizer) {
+  } else if (HasBf16(subgraph)) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = bfloat16_fix::Transform(subgraph, logger, model);
@@ -464,8 +475,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  }
-  else {
+  } else {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
     auto model = subgraph.CreateModel(logger);
     auto model_proto = model->ToProto();
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 1ed89253f27ac..6a2b375d733f9 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -101,7 +101,6 @@ struct ProviderInfo {
   bool disable_dynamic_shapes{false};      // [disable_dynamic_shapes]:  Rewrite dynamic shaped models to
                                            // static shape at runtime and execute.
   bool enable_qdq_optimizer{false};        // Enables QDQ pruning for efficient inference latency with NPU
-  bool enable_bfloat16_optimizer{false};   // Enables on-the-fly bfloat16->float16 conversion
   bool enable_causallm{false};             // Enables Causal LM Compilation for ORT GenAI OVEP Pass
   bool so_context_enable{false};           // ORT session option
   bool so_disable_cpu_ep_fallback{false};  // ORT session option
@@ -111,7 +110,7 @@ struct ProviderInfo {
   const ConfigOptions* config_options{NULL};
   const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                               "enable_bfloat16_optimizer", "enable_causallm", "disable_dynamic_shapes", "reshape_input"};
+                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input"};
 };
 
 // Holds context applicable to the entire EP instance.
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 825a79520a5e1..a0aa04293ac37 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -81,8 +81,7 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   openvino_ep::GetCapability obj(ep_ctx_handle_,
                                  graph_viewer,
                                  session_context_.device_type,
-                                 session_context_.enable_qdq_optimizer,
-                                 session_context_.enable_bfloat16_optimizer);
+                                 session_context_.enable_qdq_optimizer);
   result = obj.Execute();
   session_context_.is_wholly_supported_graph = obj.IsWhollySupportedGraph();
   session_context_.has_external_weights = obj.HasExternalWeights();
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 67dc1f1c56527..bad1d416eeda2 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -340,8 +340,6 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
 
     pi.enable_qdq_optimizer = ParseBooleanOption(provider_options, "enable_qdq_optimizer");
 
-    pi.enable_bfloat16_optimizer = ParseBooleanOption(provider_options, "enable_bfloat16_optimizer");
-
     pi.enable_causallm = ParseBooleanOption(provider_options, "enable_causallm");
 
     pi.disable_dynamic_shapes = ParseBooleanOption(provider_options, "disable_dynamic_shapes");
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 4d844f233297f..88ddde8610c6e 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -30,10 +30,9 @@ namespace openvino_ep {
 GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler,
                              const GraphViewer& graph_viewer_param,
                              const std::string device_type_param,
-                             const bool enable_qdq_optimizer,
-                             bool enable_bfloat16_optimizer) : ep_ctx_handler_(ep_ctx_handler),
-                                                               graph_viewer_(graph_viewer_param),
-                                                               device_type_(std::move(device_type_param)) {
+                             const bool enable_qdq_optimizer) : ep_ctx_handler_(ep_ctx_handler),
+                                                                graph_viewer_(graph_viewer_param),
+                                                                device_type_(std::move(device_type_param)) {
   bool npu_qdq_optimizer_enabled = false;
   if (device_type_.find("NPU") != std::string::npos) {
     device_type_ = "CPU";
@@ -43,15 +42,15 @@ GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler,
   }
 
 #if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
-  data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
 #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 6
-  data_ops_ = new DataOps(graph_viewer_, V_2024_6, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_6, device_type_, npu_qdq_optimizer_enabled);
 #elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 0
-  data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
+  data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled);
 #elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 1
-  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
+  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
 #else
-  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled, enable_bfloat16_optimizer);
+  data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
 #endif
 }
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h
index 786768d0ba3c4..364e79a76f154 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h
@@ -24,8 +24,7 @@ class GetCapability {
   GetCapability(const EPCtxHandler& ep_ctx_handler,
                 const GraphViewer& graph_viewer_param,
                 const std::string device_type_param,
-                const bool enable_qdq_optimizer,
-                bool enable_bfloat16_optimizer);
+                const bool enable_qdq_optimizer);
   virtual std::vector<std::unique_ptr<ComputeCapability>> Execute();
   bool IsWhollySupportedGraph() {
     return is_wholly_supported_graph_;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index a8db03170723d..8f271a825711e 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -562,7 +562,7 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
 
   auto dtype = type_proto->tensor_type().elem_type();
   // Enable bfloat16 -> float16 on-the-fly conversion
-  if (bfloat16_optimizer_enabled_ && dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16)
+  if (dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BFLOAT16)
     return true;
   if (is_initializer) {
     for (auto const& var : supported_types_initializer_) {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index 4ee3d9736d29c..cf7d834d6cfc7 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -69,7 +69,6 @@ class DataOps {
   std::set<Pairs> supported_types_gpu_;
   std::set<Pairs> supported_types_initializer_;
   bool npu_qdq_optimizer_enabled_;
-  bool bfloat16_optimizer_enabled_;
 
  protected:
   void populate_op_mode_supported();
@@ -82,13 +81,11 @@ class DataOps {
 
  public:
   DataOps(const GraphViewer& graph_viewer_param, VersionNum ver,
-          const std::string dev_id, const bool npu_qdq_optimizer_enabled,
-          bool bfloat16_optimizer_enabled)
+          const std::string dev_id, const bool npu_qdq_optimizer_enabled)
       : graph_viewer_(graph_viewer_param),
         version_id_(ver),
         device_id_(std::move(dev_id)),
-        npu_qdq_optimizer_enabled_(npu_qdq_optimizer_enabled),
-        bfloat16_optimizer_enabled_(bfloat16_optimizer_enabled) {
+        npu_qdq_optimizer_enabled_(npu_qdq_optimizer_enabled) {
     populate_op_mode_supported();
     populate_types_supported();
   }
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 3c3a1942a4cf1..3db35ae8769e0 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -2144,7 +2144,6 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
   ov_options_converted_map["load_config"] = "";
   ov_options_converted_map["model_priority"] = "DEFAULT";
   ov_options_converted_map["enable_qdq_optimizer"] = "false";
-  ov_options_converted_map["enable_bfloat16_optimizer"] = "false";
   ov_options_converted_map["enable_causallm"] = "false";
   return ov_options_converted_map;
 }
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index cad66e4fd3566..bdc4f65e590d9 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1182,8 +1182,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
 #if defined(USE_OPENVINO) || defined(USE_OPENVINO_PROVIDER_INTERFACE)
     ProviderOptions OV_provider_options_map;
     const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
-                                                                 "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", 
-                                                                 "enable_qdq_optimizer", "enable_bfloat16_optimizer",
+                                                                 "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
                                                                  "enable_causallm", "disable_dynamic_shapes", "reshape_input"};
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 5f2ea1cf2e2d7..7a210ca8482a4 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -764,13 +764,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         } else {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_qdq_optimizer' should be a boolean i.e. true or false. Default value is false.\n");
         }
-      } else if (key == "enable_bfloat16_optimizer") {
-        if (value == "true" || value == "True" ||
-            value == "false" || value == "False") {
-          ov_options[key] = value;
-        } else {
-          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_bfloat16_optimizer' should be a boolean i.e. true or false. Default value is false.\n");
-        }
       } else if (key == "enable_causallm") {
         if (value == "true" || value == "True" ||
             value == "false" || value == "False") {

From c594c4da48d339c8a8567b3370a9f9bfd96ea606 Mon Sep 17 00:00:00 2001
From: "Klimenko, Mikhail" <mikhail.klimenko@intel.com>
Date: Tue, 29 Jul 2025 10:34:19 +0000
Subject: [PATCH 4/6] Add tests

---
 .../openvino_ep_bfloat16_pass_test.cc         | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc

diff --git a/onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc
new file mode 100644
index 0000000000000..fc90563a61bb1
--- /dev/null
+++ b/onnxruntime/test/providers/openvino/openvino_ep_bfloat16_pass_test.cc
@@ -0,0 +1,116 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <filesystem>
+#include <map>
+#include <string>
+
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/framework/float16.h"
+
+#include "test/util/include/test/test_environment.h"
+#include "test/optimizer/qdq_test_utils.h"
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+using namespace ONNX_NAMESPACE;
+using namespace onnxruntime::logging;
+
+extern std::unique_ptr<Ort::Env> ort_env;
+
+class OVEP_BF16_Tests : public ::testing::TestWithParam<std::string> {};
+
+namespace detail {
+auto ConstructModel() {
+  using namespace onnxruntime;
+  using namespace test;
+
+  std::unordered_map<std::string, int> domain_to_version;
+  domain_to_version[kOnnxDomain] = 19;
+  Model model("Bfloat16Tester", true, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
+              domain_to_version, {}, DefaultLoggingManager().DefaultLogger());
+
+  Graph& graph = model.MainGraph();
+  ModelTestBuilder builder(graph);
+  auto dim = 4;
+  std::vector<float> input_data(dim, 1.0f);
+  auto* input = builder.MakeInput<float>({dim}, input_data);
+  builder.graph_.SetInputs({input});
+
+  auto* cast_to_bf16 = builder.MakeIntermediate();
+  Node& cast_node = builder.AddNode("Cast", {input}, {cast_to_bf16}, "");
+  cast_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16));
+
+  std::vector<onnxruntime::BFloat16> weight_data(dim * dim);
+  for (std::size_t i = 0; i < weight_data.size(); ++i)
+    weight_data[i] = onnxruntime::BFloat16(static_cast<float>(i % dim) / dim);
+  auto* weights = builder.MakeInitializer<onnxruntime::BFloat16>({dim, dim}, weight_data);
+
+  auto* matmul_out = builder.MakeIntermediate();
+  builder.AddNode("MatMul", {cast_to_bf16, weights}, {matmul_out});
+
+  std::vector<onnxruntime::BFloat16> weight_data_2(dim * dim);
+  for (std::size_t i = 0; i < weight_data_2.size(); ++i)
+    weight_data_2[i] = onnxruntime::BFloat16(static_cast<float>(i % dim) / dim);
+  auto* weights_2 = builder.MakeInitializer<onnxruntime::BFloat16>({dim, dim}, weight_data_2);
+
+  auto* matmul_out_2 = builder.MakeIntermediate();
+  builder.AddNode("MatMul", {matmul_out, weights_2}, {matmul_out_2});
+
+  auto* output = builder.MakeOutput();
+  Node& cast2_node = builder.AddNode("Cast", {matmul_out_2}, {output});
+  cast2_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT));
+
+  builder.SetGraphOutputs();
+  auto st = model.MainGraph().Resolve();
+  if (st != Status::OK())
+    throw std::runtime_error(st.ErrorMessage());
+  return model;
+}
+
+auto ProbeDevice(const std::string& device) {
+  static std::map<std::string, bool> is_present;
+  if (is_present.find(device) == is_present.end()) {
+    Ort::SessionOptions sessionOptions;
+    std::unordered_map<std::string, std::string> ov_options;
+    ov_options["device_type"] = device;
+    try {
+      sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
+      is_present[device] = true;
+    } catch (...) {
+      is_present[device] = false;
+    }
+  }
+  return is_present[device];
+}
+}  // namespace detail
+
+namespace onnxruntime {
+namespace test {
+
+TEST_P(OVEP_BF16_Tests, TestModelConversion) {
+  Ort::SessionOptions sessionOptions;
+  std::unordered_map<std::string, std::string> ov_options;
+  const auto& device = GetParam();
+  if (!::detail::ProbeDevice(device))
+    GTEST_SKIP() << device + " is not available on this machine";
+
+  ov_options["device_type"] = device;
+  auto model = ::detail::ConstructModel();
+  sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
+
+  std::string model_data;
+  model.ToProto().SerializeToString(&model_data);
+  auto model_data_span = AsByteSpan(model_data.data(), model_data.size());
+  try {
+    Ort::Session session(*ort_env, model_data_span.data(), model_data_span.size(), sessionOptions);
+  } catch (...) {
+    FAIL();
+  }
+}
+INSTANTIATE_TEST_SUITE_P(OVEP_Tests,
+                         OVEP_BF16_Tests,
+                         ::testing::Values("CPU", "GPU", "NPU"));
+}  // namespace test
+}  // namespace onnxruntime

From 58e12daf8eed18b9adfb1ec058ac89393098af62 Mon Sep 17 00:00:00 2001
From: "Klimenko, Mikhail" <mikhail.klimenko@intel.com>
Date: Wed, 30 Jul 2025 14:12:12 +0000
Subject: [PATCH 5/6] Rename detection function

---
 onnxruntime/core/providers/openvino/backend_manager.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index a18f9fc7a6530..cadeab4cbd4cc 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -375,7 +375,7 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
   return false;
 }
 
-static bool HasBf16(const onnxruntime::GraphViewer& graph_viewer) {
+static bool IsModelBF16(const onnxruntime::GraphViewer& graph_viewer) {
   const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
   for (std::size_t i = 0; i < node_indices.size(); i++) {
     gsl::not_null<const onnxruntime::Node*> node(graph_viewer.GetNode(node_indices[i]));
@@ -468,7 +468,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
-  } else if (HasBf16(subgraph)) {
+  } else if (IsModelBF16(subgraph)) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = bfloat16_fix::Transform(subgraph, logger, model);

From d846db049ebb49d93d7a45815bf0351656b953e7 Mon Sep 17 00:00:00 2001
From: "Klimenko, Mikhail" <mikhail.klimenko@intel.com>
Date: Wed, 30 Jul 2025 14:44:47 +0000
Subject: [PATCH 6/6] Fix CI for strict aliasing rules

---
 .../providers/openvino/qdq_transformations/qdq_scales_fix.cpp  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
index 11137449cc9d4..f1ce230387565 100644
--- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
+++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_scales_fix.cpp
@@ -973,8 +973,7 @@ void replace_bf16_with_fp16(qdq_scales_fix::CustomGraph& gen_graph) {
         for (int i = 0; i < tensor_proto->dims_size(); ++i)
           size *= tensor_proto->dims()[i];
         for (std::int64_t i = 0; i < size; ++i) {
-          std::uint32_t tmp = static_cast<std::uint32_t>(raw_data[i]) << 16;
-          raw_data[i] = onnxruntime::MLFloat16(*reinterpret_cast<float*>(&tmp)).val;
+          raw_data[i] = onnxruntime::MLFloat16(onnxruntime::BFloat16::FromBits(raw_data[i])).val;
         }
       }
     }