diff --git a/SYCL/AtomicRef/assignment_atomic64.cpp b/SYCL/AtomicRef/assignment_atomic64.cpp
index f1ef951b0d..1ed7262749 100644
--- a/SYCL/AtomicRef/assignment_atomic64.cpp
+++ b/SYCL/AtomicRef/assignment_atomic64.cpp
@@ -16,7 +16,9 @@ int main() {
   }
 
   constexpr int N = 32;
+#ifdef ENABLE_FP64
   assignment_test<double>(q, N);
+#endif
 
   // Include long tests if they are 64 bits wide
   if constexpr (sizeof(long) == 8) {
diff --git a/SYCL/AtomicRef/assignment_atomic64_aspect_fp64.cpp b/SYCL/AtomicRef/assignment_atomic64_aspect_fp64.cpp
new file mode 100644
index 0000000000..0dce0ba117
--- /dev/null
+++ b/SYCL/AtomicRef/assignment_atomic64_aspect_fp64.cpp
@@ -0,0 +1,16 @@
+// Enable FP64 part of <assignment_atomic64.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// XFAIL: hip
+// Expected failure because hip does not have atomic64 check implementation
+
+#include "assignment_atomic64.cpp"
diff --git a/SYCL/AtomicRef/assignment_atomic64_generic.cpp b/SYCL/AtomicRef/assignment_atomic64_generic.cpp
index 3bc735c2c1..8e18d356cc 100644
--- a/SYCL/AtomicRef/assignment_atomic64_generic.cpp
+++ b/SYCL/AtomicRef/assignment_atomic64_generic.cpp
@@ -19,8 +19,9 @@ int main() {
   }
 
   constexpr int N = 32;
+#ifdef ENABLE_FP64
   assignment_generic_test<double>(q, N);
-
+#endif
   // Include long tests if they are 64 bits wide
   if constexpr (sizeof(long) == 8) {
     assignment_generic_test<long>(q, N);
@@ -37,6 +38,5 @@ int main() {
   if constexpr (sizeof(char *) == 8) {
     assignment_generic_test<char *>(q, N);
   }
-
   std::cout << "Test passed." << std::endl;
 }
diff --git a/SYCL/AtomicRef/assignment_atomic64_generic_aspect_fp64.cpp b/SYCL/AtomicRef/assignment_atomic64_generic_aspect_fp64.cpp
new file mode 100644
index 0000000000..fb5feaace7
--- /dev/null
+++ b/SYCL/AtomicRef/assignment_atomic64_generic_aspect_fp64.cpp
@@ -0,0 +1,16 @@
+// Enable FP64 part of <assignment_atomic64_generic.cpp>. To be removed once
+// DPC++ supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// CUDA backend has had no support for the generic address space yet
+// XFAIL: cuda || hip
+
+#include "assignment_atomic64_generic.cpp"
diff --git a/SYCL/Basic/buffer/buffer.cpp b/SYCL/Basic/buffer/buffer.cpp
index 5ced4c9a04..913fe015c1 100644
--- a/SYCL/Basic/buffer/buffer.cpp
+++ b/SYCL/Basic/buffer/buffer.cpp
@@ -14,10 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <sycl/sycl.hpp>
+
 #include <cassert>
-#include <iostream>
 #include <memory>
-#include <sycl/sycl.hpp>
 
 using namespace sycl;
 
@@ -513,15 +513,22 @@ int main() {
                                     [](bool *data) { delete[] data; });
     std::shared_ptr<int> int_shrd(new int[size],
                                   [](int *data) { delete[] data; });
+#ifdef ENABLE_FP64
     std::shared_ptr<double> double_shrd(new double[size],
                                         [](double *data) { delete[] data; });
+#endif
 
     std::vector<bool> bool_vector;
     std::vector<int> int_vector;
+#ifdef ENABLE_FP64
     std::vector<double> double_vector;
+#endif
+
     bool_vector.reserve(size);
     int_vector.reserve(size);
+#ifdef ENABLE_FP64
     double_vector.reserve(size);
+#endif
 
     sycl::queue Queue;
     std::mutex m;
@@ -532,43 +539,58 @@ int main() {
       sycl::buffer<int, dims> buf_int_shrd(
           int_shrd, r,
           sycl::property_list{sycl::property::buffer::use_mutex(m)});
+#ifdef ENABLE_FP64
       sycl::buffer<double, dims> buf_double_shrd(
           double_shrd, r,
           sycl::property_list{sycl::property::buffer::use_mutex(m)});
+#endif
       m.lock();
       std::fill(bool_shrd.get(), (bool_shrd.get() + size), bool());
       std::fill(int_shrd.get(), (int_shrd.get() + size), int());
+#ifdef ENABLE_FP64
       std::fill(double_shrd.get(), (double_shrd.get() + size), double());
+#endif
       m.unlock();
 
       buf_bool_shrd.set_final_data(bool_vector.begin());
       buf_int_shrd.set_final_data(int_vector.begin());
+#ifdef ENABLE_FP64
       buf_double_shrd.set_final_data(double_vector.begin());
+#endif
+
       buf_bool_shrd.set_write_back(true);
       buf_int_shrd.set_write_back(true);
+#ifdef ENABLE_FP64
       buf_double_shrd.set_write_back(true);
+#endif
 
       Queue.submit([&](sycl::handler &cgh) {
         auto Accessor_bool =
             buf_bool_shrd.get_access<sycl::access::mode::write>(cgh);
         auto Accessor_int =
             buf_int_shrd.get_access<sycl::access::mode::write>(cgh);
+#ifdef ENABLE_FP64
         auto Accessor_double =
             buf_double_shrd.get_access<sycl::access::mode::write>(cgh);
+#endif
         cgh.parallel_for<class FillBuffer>(r, [=](sycl::id<1> WIid) {
           Accessor_bool[WIid] = true;
           Accessor_int[WIid] = 3;
+#ifdef ENABLE_FP64
           Accessor_double[WIid] = 7.5;
+#endif
         });
       });
     } // Data is copied back
 
     for (size_t i = 0; i < size; i++) {
-      if (bool_vector[i] != true || int_vector[i] != 3 ||
-          double_vector[i] != 7.5) {
-        assert(false && "Data was not copied back");
-        return 1;
-      }
+      bool Passed = true;
+      Passed &= (bool_vector[i] == true);
+      Passed &= (int_vector[i] == 3);
+#ifdef ENABLE_FP64
+      Passed &= (double_vector[i] == 7.5);
+#endif
+      assert(Passed && "Data was not copied back");
     }
   }
 
diff --git a/SYCL/Basic/buffer/buffer_aspect_fp64.cpp b/SYCL/Basic/buffer/buffer_aspect_fp64.cpp
new file mode 100644
index 0000000000..c4232caf5a
--- /dev/null
+++ b/SYCL/Basic/buffer/buffer_aspect_fp64.cpp
@@ -0,0 +1,23 @@
+// Enable FP64 part of <buffer.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx %cxx_std_optionc++17 -DENABLE_FP64 %s -o %t1.out %sycl_options
+// RUN: %HOST_RUN_PLACEHOLDER %t1.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t2.out
+// RUN: %HOST_RUN_PLACEHOLDER %t2.out
+// RUN: %CPU_RUN_PLACEHOLDER %t2.out
+// RUN: %GPU_RUN_PLACEHOLDER %t2.out
+// RUN: %ACC_RUN_PLACEHOLDER %t2.out
+
+//==------------------- buffer.cpp - SYCL buffer basic test ----------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "buffer.cpp"
diff --git a/SYCL/DeviceLib/built-ins/nan.cpp b/SYCL/DeviceLib/built-ins/nan.cpp
index caea23f560..5a07762f5f 100644
--- a/SYCL/DeviceLib/built-ins/nan.cpp
+++ b/SYCL/DeviceLib/built-ins/nan.cpp
@@ -41,13 +41,14 @@ template <typename T, typename R> void check_nan(s::queue &Queue) {
 int main() {
   test_nan_call<s::ushort, s::half>();
   test_nan_call<s::uint, float>();
-  test_nan_call<s::ulong, double>();
-  test_nan_call<s::ulonglong, double>();
   test_nan_call<s::ushort2, s::half2>();
   test_nan_call<s::uint2, s::float2>();
+#ifdef ENABLE_FP64
+  test_nan_call<s::ulong, double>();
+  test_nan_call<s::ulonglong, double>();
   test_nan_call<s::ulong2, s::double2>();
   test_nan_call<s::ulonglong2, s::double2>();
-
+#endif
   s::queue Queue([](sycl::exception_list ExceptionList) {
     for (std::exception_ptr ExceptionPtr : ExceptionList) {
       try {
@@ -63,10 +64,13 @@ int main() {
   if (Queue.get_device().has(sycl::aspect::fp16))
     check_nan<unsigned short, s::half>(Queue);
 #endif
+
   check_nan<unsigned int, float>(Queue);
+#ifdef ENABLE_FP64
   if (Queue.get_device().has(sycl::aspect::fp64)) {
     check_nan<unsigned long, double>(Queue);
     check_nan<unsigned long long, double>(Queue);
   }
+#endif
   return 0;
 }
diff --git a/SYCL/DeviceLib/built-ins/nan_aspect_fp64.cpp b/SYCL/DeviceLib/built-ins/nan_aspect_fp64.cpp
new file mode 100644
index 0000000000..684e568924
--- /dev/null
+++ b/SYCL/DeviceLib/built-ins/nan_aspect_fp64.cpp
@@ -0,0 +1,13 @@
+// Enable FP64 part of <nan.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include "nan.cpp"
diff --git a/SYCL/ESIMD/aot_mixed.cpp b/SYCL/ESIMD/aot_mixed.cpp
index 0f237a558a..366abf19d8 100644
--- a/SYCL/ESIMD/aot_mixed.cpp
+++ b/SYCL/ESIMD/aot_mixed.cpp
@@ -8,9 +8,9 @@
 // REQUIRES: gpu
 // UNSUPPORTED: cuda || hip
 // UNSUPPORTED: esimd_emulator
-// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" -o %t.sycl.out -DENABLE_SYCL=0 %s
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen12lp" -o %t.sycl.out -DENABLE_SYCL=0 %s
 // RUN: %GPU_RUN_PLACEHOLDER %t.sycl.out
-// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" -o %t.out %s
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen12lp" -o %t.out %s
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 
 // This test checks the following ESIMD ahead-of-time compilation scenarios:
diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp
index da908391f8..c145ecc3f1 100644
--- a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp
+++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp
@@ -265,12 +265,17 @@ int main(void) {
   auto arith_ops = esimd_test::ArithBinaryOpsNoDiv;
   passed &= test<unsigned char, int, 1, BinOp, VSf, IDf>(arith_ops, q);
   passed &= test<char, float, 7, BinOp, VEf, IDf>(arith_ops, q, 0.000001f);
+#ifdef ENABLE_FP64
   passed &= test<short, double, 7, BinOp, VEf, IDf>(arith_ops, q, 1e-15);
+#endif
   passed &= test<float, float, 32, BinOp, VEf, IDf>(arith_ops, q, 0.000001f);
   passed &= test<half, char, 1, BinOp, verify_n, IDf>(arith_ops, q, 1);
   passed &= test<half, unsigned int, 32, BinOp, VSf, IDf>(arith_ops, q, 1);
-  passed &= test<double, half, 7, BinOp, VSf, IDf>(arith_ops, q);
   passed &= test<short, uint64_t, 7, BinOp, VSf, IDf>(arith_ops, q);
+#ifdef ENABLE_FP64
+  passed &= test<double, half, 7, BinOp, VSf, IDf>(arith_ops, q);
+  passed &= test<short, double, 7, BinOp, VSf, IDf>(arith_ops, q);
+#endif
 #ifdef USE_BF16
   passed &= test<bfloat16, int, 8, BinOp, VSf, IDf>(arith_ops, q);
   passed &= test<half, bfloat16, 7, BinOp, VEfa, IDf>(arith_ops, q, 0.03);
@@ -326,12 +331,17 @@ int main(void) {
   auto cmp_ops = esimd_test::CmpOps;
   passed &= test<unsigned char, int, 1, CmpOp, VSf, IDf>(cmp_ops, q);
   passed &= test<char, float, 7, CmpOp, VSf, IDf>(cmp_ops, q);
-  passed &= test<short, double, 7, CmpOp, VSf, IDf>(cmp_ops, q);
+#ifdef ENABLE_FP64
+  passed &= test<short, double, 7, BinOp, VEf, IDf>(arith_ops, q, 1e-15);
+#endif
   passed &= test<float, float, 32, CmpOp, VSf, IDf>(cmp_ops, q);
   passed &= test<half, char, 1, CmpOp, VSf, IDf>(cmp_ops, q, 1);
   passed &= test<half, unsigned int, 32, CmpOp, VSf, IDf>(cmp_ops, q, 1);
-  passed &= test<double, half, 7, CmpOp, VSf, IDf>(cmp_ops, q);
   passed &= test<short, uint64_t, 7, CmpOp, VSf, IDf>(cmp_ops, q);
+#ifdef ENABLE_FP64
+  passed &= test<double, half, 7, CmpOp, VSf, IDf>(cmp_ops, q);
+  passed &= test<short, double, 7, CmpOp, VSf, IDf>(cmp_ops, q);
+#endif
 #ifdef USE_BF16
   passed &= test<bfloat16, int, 32, CmpOp, VSf, IDf>(cmp_ops, q);
   passed &= test<half, bfloat16, 7, CmpOp, VSf, IDf>(cmp_ops, q);
diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy_aspect_fp64.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy_aspect_fp64.cpp
new file mode 100644
index 0000000000..d9d2b256bb
--- /dev/null
+++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy_aspect_fp64.cpp
@@ -0,0 +1,31 @@
+//==-- bin_un_cmp_ops_heavy_aspect-fp64.cpp  - DPC++ ESIMD on-device test --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <bin_and_cmp_ops_heavy.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64, gpu
+// UNSUPPORTED: cuda || hip
+// TODO: esimd_emulator fails due to unimplemented 'half' type
+// XFAIL: esimd_emulator
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// Tests various binary operations applied to simd objects.
+
+// TODO
+// Arithmetic operations behaviour depends on Gen's control regiter's rounding
+// mode, which is RTNE by default:
+//    cr0.5:4 is 00b = Round to Nearest or Even (RTNE)
+// For half this leads to divergence between Gen and host (emulated) results
+// larger than certain threshold. Might need to tune the cr0 once this feature
+// is available in ESIMD.
+//
+
+#include "bin_and_cmp_ops_heavy.cpp"
diff --git a/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp b/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp
index 39f9be9fcd..3cd6e5c556 100644
--- a/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp
+++ b/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu, level_zero
+// REQUIRES: gpu, level_zero, aspect-fp64
 // XREQUIRES: gpu
 // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet.
 // The current "REQUIRES" should be replaced with "gpu" only as mentioned in
diff --git a/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp b/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp
index 797ff009c8..4cf2aa7add 100644
--- a/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp
+++ b/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu, level_zero
+// REQUIRES: gpu, level_zero, aspect-fp64
 // XREQUIRES: gpu
 // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet.
 // The current "REQUIRES" should be replaced with "gpu" only as mentioned in
diff --git a/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp b/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp
index 4b960879c1..4e8bb1414f 100644
--- a/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp
+++ b/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu, level_zero
+// REQUIRES: gpu, level_zero, aspect-fp64
 // XREQUIRES: gpu
 // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet.
 // The current "REQUIRES" should be replaced with "gpu" only as mentioned in
diff --git a/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp b/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp
index cc19d2cca7..84fac0b20b 100644
--- a/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp
+++ b/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp
@@ -224,8 +224,10 @@ bool run_test_for_types(sycl::queue &queue) {
   const auto great_size = get_dimensions<desired_simd_large_size>();
 #ifdef SIMD_RUN_TEST_WITH_SYCL_HALF_TYPE
   const auto all_types = get_tested_types<TestedTypes>();
-#else
+#elif SIMD_RUN_TEST_WITH_SYCL_DOUBLE_TYPE
   const auto all_types = named_type_pack<double>::generate("double");
+#else
+  const auto all_types = named_type_pack<float>::generate("float");
 #endif
 
   // Verify correctness for different select sizes.
diff --git a/SYCL/ESIMD/api/functional/functions/functions_select_2d_core_aspect_fp64.cpp b/SYCL/ESIMD/api/functional/functions/functions_select_2d_core_aspect_fp64.cpp
new file mode 100644
index 0000000000..73071ff8ca
--- /dev/null
+++ b/SYCL/ESIMD/api/functional/functions/functions_select_2d_core_aspect_fp64.cpp
@@ -0,0 +1,41 @@
+//==- functions_select_2d_core_aspect_fp64.cpp - DPC++ ESIMD on-device test ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// REQUIRES: gpu, level_zero, aspect-fp64
+// XREQUIRES: gpu
+// TODO gpu and level_zero in REQUIRES due to only this platforms supported yet.
+// The current "REQUIRES" should be replaced with "gpu" only as mentioned in
+// "XREQUIRES".
+// UNSUPPORTED: cuda, hip
+// RUN: %clangxx -fsycl -DSIMD_RUN_TEST_WITH_SYCL_DOUBLE_TYPE %s -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+// Test for simd select for 2d function.
+// The test creates source simd instance with reference data and invokes logical
+// not operator, using core data types.
+// The test verifies that selected values can be changed with avoid to change
+// values, that hasn't beed selected.
+
+#include "functions_select_2d.hpp"
+
+using namespace sycl::ext::intel::experimental::esimd;
+using namespace esimd_test::api::functional;
+
+int main(int, char **) {
+  sycl::queue queue(esimd_test::ESIMDSelector{},
+                    esimd_test::createExceptionHandler());
+
+  if (!queue.get_device().has(sycl::aspect::fp64) {
+    std::cout << "Skipping test\n";
+    return 0;
+  }
+
+  bool passed = functions::run_test_for_types<tested_types::core>(queue);
+
+  std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n");
+  return passed ? 0 : 1;
+}
diff --git a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp
index 5e38a3b7cf..bba6deaa45 100644
--- a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp
+++ b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp
@@ -6,7 +6,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu, level_zero
+// REQUIRES: gpu, level_zero, aspect-fp64
 // XREQUIRES: gpu
 // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet.
 // The current "REQUIRES" should be replaced with "gpu" only as mentioned in
diff --git a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp
index 7f88d0ad96..87355f845d 100644
--- a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp
+++ b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp
@@ -6,7 +6,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu, level_zero
+// REQUIRES: gpu, level_zero, aspect-fp64
 // XREQUIRES: gpu
 // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet.
 // The current "REQUIRES" should be replaced with "gpu" only as mentioned in
diff --git a/SYCL/ESIMD/api/replicate_smoke.cpp b/SYCL/ESIMD/api/replicate_smoke.cpp
index c9985edc93..194c5c227a 100644
--- a/SYCL/ESIMD/api/replicate_smoke.cpp
+++ b/SYCL/ESIMD/api/replicate_smoke.cpp
@@ -186,7 +186,9 @@ int main(int argc, char **argv) {
   passed &= test<int>(q);
   passed &= test<uint64_t>(q);
   passed &= test<float>(q);
+#ifdef ENABLE_FP64
   passed &= test<double>(q);
+#endif
 
   std::cout << (passed ? "Test passed\n" : "Test FAILED\n");
   return passed ? 0 : 1;
diff --git a/SYCL/ESIMD/api/replicate_smoke_aspect_fp64.cpp b/SYCL/ESIMD/api/replicate_smoke_aspect_fp64.cpp
new file mode 100644
index 0000000000..ad587c4df8
--- /dev/null
+++ b/SYCL/ESIMD/api/replicate_smoke_aspect_fp64.cpp
@@ -0,0 +1,19 @@
+//==------- replicate_smoke.cpp  - DPC++ ESIMD on-device test --------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Enable FP64 part of <replicate_smoke.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+#include "replicate_smoke.cpp"
diff --git a/SYCL/ESIMD/api/saturation_smoke.cpp b/SYCL/ESIMD/api/saturation_smoke.cpp
index e6aa049160..789224963f 100644
--- a/SYCL/ESIMD/api/saturation_smoke.cpp
+++ b/SYCL/ESIMD/api/saturation_smoke.cpp
@@ -188,7 +188,9 @@ int main(int argc, char **argv) {
   passed &= test<half, int, FpToInt>(q);
   passed &= test<half, unsigned char, FpToInt>(q);
   passed &= test<float, int, FpToInt>(q);
+#ifdef ENABLE_FP64
   passed &= test<double, short, FpToInt>(q);
+#endif
 
   passed &= test<unsigned char, char, UIntToSameOrNarrowAnyInt>(q);
   passed &= test<unsigned short, short, UIntToSameOrNarrowAnyInt>(q);
@@ -205,7 +207,9 @@ int main(int argc, char **argv) {
 
   passed &= test<float, float, FpToFp>(q);
   passed &= test<half, half, FpToFp>(q);
+#ifdef ENABLE_FP64
   passed &= test<double, double, FpToFp>(q);
+#endif
 
   std::cout << (passed ? "Test passed\n" : "Test FAILED\n");
   return passed ? 0 : 1;
diff --git a/SYCL/ESIMD/api/saturation_smoke_aspect_fp64.cpp b/SYCL/ESIMD/api/saturation_smoke_aspect_fp64.cpp
new file mode 100644
index 0000000000..c1c0180911
--- /dev/null
+++ b/SYCL/ESIMD/api/saturation_smoke_aspect_fp64.cpp
@@ -0,0 +1,22 @@
+//==---- saturation_smoke_aspect_fp64.cpp - DPC++ ESIMD on-device test ----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <saturation_smoke.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// TODO: esimd_emulator fails due to unimplemented 'half' type
+// XFAIL: esimd_emulator
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+// The test checks main functionality of esimd::saturate function.
+
+#include "saturation_smoke.cpp"
diff --git a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp
index c397945845..7be06cdd51 100644
--- a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp
+++ b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp
@@ -23,7 +23,9 @@ int main(int argc, char **argv) {
   bool passed = true;
   passed &= test<half>(q);
   passed &= test<float>(q);
+#ifdef ENABLE_FP64
   passed &= test<double>(q);
+#endif
 
   std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n");
   return passed ? 0 : 1;
diff --git a/SYCL/ESIMD/api/simd_view_select_2d_fp_aspect_fp64.cpp b/SYCL/ESIMD/api/simd_view_select_2d_fp_aspect_fp64.cpp
new file mode 100644
index 0000000000..5b46c63cdf
--- /dev/null
+++ b/SYCL/ESIMD/api/simd_view_select_2d_fp_aspect_fp64.cpp
@@ -0,0 +1,23 @@
+//==- simd_view_select_2d_fp_aspect_fp64.cpp  - DPC++ ESIMD on-device test -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <simd_view_select_2d.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// TODO: esimd_emulator fails due to unimplemented 'single_task()' method
+// XFAIL: esimd_emulator
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+// Smoke test for 2D region select API which can be used to represent 2D tiles.
+// Tests FP types.
+
+#include "simd_view_select_2d.cpp"
diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp
index bc86616e61..8797f345ad 100644
--- a/SYCL/ESIMD/api/unary_ops_heavy.cpp
+++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp
@@ -173,16 +173,20 @@ int main(void) {
   passed &= test<half, 1>(mod_ops, q);
   passed &= test<half, 32>(mod_ops, q);
   passed &= test<float, 32>(mod_ops, q);
+#ifdef ENABLE_FP64
   passed &= test<double, 7>(mod_ops, q);
+#endif
 
-  auto signed_ops = esimd_test::OpSeq<UnOp, UnOp::minus, UnOp::plus>{};
-  passed &= test<char, 7>(signed_ops, q);
-  passed &= test<short, 7>(signed_ops, q);
-  passed &= test<int, 16>(signed_ops, q);
-  passed &= test<int64_t, 16>(signed_ops, q);
-  passed &= test<half, 16>(signed_ops, q);
-  passed &= test<float, 16>(signed_ops, q);
-  passed &= test<double, 16>(signed_ops, q);
+  auto singed_ops = esimd_test::OpSeq<UnOp, UnOp::minus, UnOp::plus>{};
+  passed &= test<char, 7>(singed_ops, q);
+  passed &= test<short, 7>(singed_ops, q);
+  passed &= test<int, 16>(singed_ops, q);
+  passed &= test<int64_t, 16>(singed_ops, q);
+  passed &= test<half, 16>(singed_ops, q);
+  passed &= test<float, 16>(singed_ops, q);
+#ifdef ENABLE_FP64
+  passed &= test<double, 16>(singed_ops, q);
+#endif
 
 #ifdef USE_BF16
   // TODO: the rest unary operations are not yet supported for bfloat16 on host.
diff --git a/SYCL/ESIMD/api/unary_ops_heavy_aspect_fp64.cpp b/SYCL/ESIMD/api/unary_ops_heavy_aspect_fp64.cpp
new file mode 100644
index 0000000000..dbf71de392
--- /dev/null
+++ b/SYCL/ESIMD/api/unary_ops_heavy_aspect_fp64.cpp
@@ -0,0 +1,20 @@
+//==------ unary_ops_heavy_aspect_fp64.cpp  - DPC++ ESIMD on-device test ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <unary_ops_heavy.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// TODO: esimd_emulator fails due to unimplemented 'half' type
+// XFAIL: esimd_emulator
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+#include "unary_ops_heavy.cpp"
diff --git a/SYCL/ESIMD/ext_math.cpp b/SYCL/ESIMD/ext_math.cpp
index c515cbd29a..e53a534f88 100644
--- a/SYCL/ESIMD/ext_math.cpp
+++ b/SYCL/ESIMD/ext_math.cpp
@@ -467,17 +467,23 @@ int main(void) {
   Pass &= testESIMD<half, 8>(Q);
   Pass &= testESIMD<float, 16>(Q);
   Pass &= testESIMD<float, 32>(Q);
+  Pass &= testSYCL<float, 8>(Q);
+  Pass &= testSYCL<float, 32>(Q);
+  Pass &= testESIMDPow<float, 8>(Q);
+  Pass &= testESIMDPow<half, 32>(Q);
   if (Q.get_backend() != sycl::backend::ext_intel_esimd_emulator) {
     // ESIMD_EMULATOR supports only ESIMD kernels
     Pass &= testSYCL<float, 8>(Q);
     Pass &= testSYCL<float, 32>(Q);
   }
   Pass &= testESIMDSqrtIEEE<float, 16>(Q);
+#ifdef ENABLE_FP64
   Pass &= testESIMDSqrtIEEE<double, 32>(Q);
+#endif
   Pass &= testESIMDDivIEEE<float, 8>(Q);
+#ifdef ENABLE_FP64
   Pass &= testESIMDDivIEEE<double, 32>(Q);
-  Pass &= testESIMDPow<float, 8>(Q);
-  Pass &= testESIMDPow<half, 32>(Q);
+#endif
   std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
   return Pass ? 0 : 1;
 }
diff --git a/SYCL/ESIMD/ext_math_aspect_fp64.cpp b/SYCL/ESIMD/ext_math_aspect_fp64.cpp
new file mode 100644
index 0000000000..e519e916ea
--- /dev/null
+++ b/SYCL/ESIMD/ext_math_aspect_fp64.cpp
@@ -0,0 +1,20 @@
+//==----- ext_math_aspect_fp64.cpp  - DPC++ ESIMD extended math test -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <ext_math.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// TODO: esimd_emulator fails due to unimplemented 'half' type
+// XFAIL: esimd_emulator
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+#include "ext_math.cpp"
diff --git a/SYCL/ESIMD/regression/Inputs/dgetrf.hpp b/SYCL/ESIMD/regression/Inputs/dgetrf.hpp
index 6b099b38b0..2ccf5f5356 100644
--- a/SYCL/ESIMD/regression/Inputs/dgetrf.hpp
+++ b/SYCL/ESIMD/regression/Inputs/dgetrf.hpp
@@ -21,7 +21,6 @@
 #define ABS(x) ((x) >= 0 ? (x) : -(x))
 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
-#define FP_RAND ((double)rand() / (double)RAND_MAX)
 
 #define OUTN(text, ...) fprintf(stderr, text, ##__VA_ARGS__)
 #define OUT(text, ...) OUTN(text "\n", ##__VA_ARGS__)
@@ -47,15 +46,16 @@ using namespace sycl;
 using namespace std;
 using namespace sycl::ext::intel::esimd;
 
-ESIMD_PRIVATE ESIMD_REGISTER(192) simd<double, 3 * 32 * 4> GRF;
+template <typename T> ESIMD_PRIVATE ESIMD_REGISTER(192) simd<T, 3 * 32 * 4> GRF;
 
 #define V(x, w, i) (x).template select<w, 1>(i)
 #define V1(x, i) V(x, 1, i)
 #define V8(x, i) V(x, 8, i)
 #define BCAST8(x, i) (x).template replicate_w<8, 1>(i)
 
-template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
-  auto a = V(GRF, M * N, 0);
+template <typename T, int M, int N, int K>
+ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
+  auto a = V(GRF<T>, M * N, 0);
 
   if (K % 8) {
     simd_mask<8> mask = 1;
@@ -69,7 +69,7 @@ template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
       V1(mask, k) = 0;
       if (ak0[k] != 0.0) {
         // scal
-        double temp = 1.0 / ak0[k];
+        T temp = 1.0 / ak0[k];
         ak0.merge(ak0 * temp, mask);
         for (int i = 8 + K & (-8); i < M; i += 8) {
           V8(ak, i) *= temp;
@@ -98,7 +98,7 @@ template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
         V1(mask, k) = 0;
         if (ak0[k] != 0.0) {
           // scal
-          double temp = 1.0 / ak0[k];
+          T temp = 1.0 / ak0[k];
           ak0.merge(ak0 * temp, mask);
           for (int i = 16 + (K & (-8)) + kk; i < M; i += 8) {
             V8(ak, i) *= temp;
@@ -129,7 +129,7 @@ template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
         V1(mask, k) = 0;
         if (ak0[k] != 0.0) {
           // scal
-          double temp = 1.0 / ak0[k];
+          T temp = 1.0 / ak0[k];
           ak0.merge(ak0 * temp, mask);
           for (int i = 8 + K + kk; i < M; i += 8) {
             V8(ak, i) *= temp;
@@ -159,16 +159,16 @@ template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
 // into GRF K - an update rank P0=A[0:M,0:K] = column(F=A[0:K,0:K],
 // L=A[K:M,0:K]) - panel to update with P1=A[0:M,K:K+N] = column(U=A[0:K,K:K+N],
 // T=A[K:M,K:K+N]) - panel to be updated
-template <int M, int N, int K>
-ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) {
-  auto p1 = V(GRF, M * N, 0);
-  double *a1;
+template <typename T, int M, int N, int K>
+ESIMD_INLINE void dgetrfnp_left_step(T *a, int64_t lda, int64_t *info) {
+  auto p1 = V(GRF<T>, M * N, 0);
+  T *a1;
   int i, j, k;
 
   // load P1
   for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda)
     for (i = 0; i < M; i += 8) {
-      simd<double, 8> data;
+      simd<T, 8> data;
       data.copy_from(a1 + i);
       V8(p1, j * M + i) = data;
     }
@@ -178,10 +178,10 @@ ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) {
     // (gemm) update T=T-L*U
     for (int kk = 0; kk < K; kk += 8) {
       simd_mask<8> mask = 1;
-      simd<double, 8> a0k, aik;
+      simd<T, 8> a0k, aik;
       for (k = 0; k < 8 && kk + k < K; k++) {
         V1(mask, k) = 0;
-        simd<double, 8> data;
+        simd<T, 8> data;
         data.copy_from(a + kk + (kk + k) * lda);
         V8(a0k, 0) = data;
         for (j = 0; j < N; j++) {
@@ -193,7 +193,7 @@ ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) {
       }
       for (k = 0; k < 8 && kk + k < K; k++) {
         for (i = kk + 8; i < M; i += 8) {
-          simd<double, 8> data;
+          simd<T, 8> data;
           data.copy_from(a + i + (kk + k) * lda);
           V8(aik, 0) = data;
           for (j = 0; j < N; j++) {
@@ -207,24 +207,25 @@ ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) {
     }
   }
   // (getrf) factorize T=P*L*U
-  dgetrfnp_panel<M, N, K>(info);
+  dgetrfnp_panel<T, M, N, K>(info);
 
   // store P1
   for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda)
     for (i = 0; i < M; i += 8) {
-      simd<double, 8> vals = V8(p1, j * M + i);
+      simd<T, 8> vals = V8(p1, j * M + i);
       vals.copy_to(a1 + i);
     }
 }
 #endif // !USE_REF
 
-ESIMD_INLINE void dgetrfnp_esimd(int64_t m, int64_t n, double *a, int64_t lda,
+template <typename T>
+ESIMD_INLINE void dgetrfnp_esimd(int64_t m, int64_t n, T *a, int64_t lda,
                                  int64_t *ipiv, int64_t *info) {
   *info = 0;
 #if defined(USE_REF)
   int i, j, k;
   for (k = 0; k < MIN(m, n); k++) {
-    double temp = a[k + k * lda];
+    T temp = a[k + k * lda];
     if (!(*info) && temp == 0.0)
       *info = k + 1;
     // scal
@@ -243,62 +244,63 @@ ESIMD_INLINE void dgetrfnp_esimd(int64_t m, int64_t n, double *a, int64_t lda,
 #else  // defined(USE_REF)
   if (m == 8) {
     if (n == 8)
-      dgetrfnp_left_step<8, 8, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 8, 8, 0>(a, lda, info);
   } else if (m == 16) {
     if (n == 8)
-      dgetrfnp_left_step<16, 8, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 16, 8, 0>(a, lda, info);
     else if (n == 16)
-      dgetrfnp_left_step<16, 16, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 16, 16, 0>(a, lda, info);
   } else if (m == 32) {
     if (n == 8)
-      dgetrfnp_left_step<32, 8, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 0>(a, lda, info);
     else if (n == 12)
-      dgetrfnp_left_step<32, 12, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 12, 0>(a, lda, info);
     else if (n == 16) {
-      dgetrfnp_left_step<32, 8, 0>(a, lda, info);
-      dgetrfnp_left_step<32, 8, 8>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 8>(a, lda, info);
     } else if (n == 24) {
-      dgetrfnp_left_step<32, 8, 0>(a, lda, info);
-      dgetrfnp_left_step<32, 8, 8>(a, lda, info);
-      dgetrfnp_left_step<32, 8, 16>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 8>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 16>(a, lda, info);
     } else if (n == 32) {
-      dgetrfnp_left_step<32, 8, 0>(a, lda, info);
-      dgetrfnp_left_step<32, 8, 8>(a, lda, info);
-      dgetrfnp_left_step<32, 8, 16>(a, lda, info);
-      dgetrfnp_left_step<32, 8, 24>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 8>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 16>(a, lda, info);
+      dgetrfnp_left_step<T, 32, 8, 24>(a, lda, info);
     }
   } else if (m == 64) {
     if (n == 6)
-      dgetrfnp_left_step<64, 6, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 0>(a, lda, info);
     else if (n == 16) {
-      dgetrfnp_left_step<64, 6, 0>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 6>(a, lda, info);
-      dgetrfnp_left_step<64, 4, 12>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 6>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 4, 12>(a, lda, info);
     } else if (n == 32) {
-      dgetrfnp_left_step<64, 6, 0>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 6>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 12>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 18>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 24>(a, lda, info);
-      dgetrfnp_left_step<64, 2, 30>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 6>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 12>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 18>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 24>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 2, 30>(a, lda, info);
     } else if (n == 64) {
-      dgetrfnp_left_step<64, 6, 0>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 6>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 12>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 18>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 24>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 30>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 36>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 42>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 48>(a, lda, info);
-      dgetrfnp_left_step<64, 6, 54>(a, lda, info);
-      dgetrfnp_left_step<64, 4, 60>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 0>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 6>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 12>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 18>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 24>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 30>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 36>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 42>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 48>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 6, 54>(a, lda, info);
+      dgetrfnp_left_step<T, 64, 4, 60>(a, lda, info);
     }
   }
 #endif // defined(USE_REF)
 }
 
-void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
+template <typename T>
+void dgetrfnp_batch_strided_c(int64_t m, int64_t n, T *a, int64_t lda,
                               int64_t stride_a, int64_t *ipiv,
                               int64_t stride_ipiv, int64_t batch,
                               int64_t *info) {
@@ -309,11 +311,11 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
 
   CHECK(status = device.is_gpu(), !status);
 
-  double *a_gpu;
+  T *a_gpu;
   int64_t *ipiv_gpu;
   int64_t *info_gpu;
-  CHECK(a_gpu = static_cast<double *>(
-            malloc_shared(stride_a * batch * sizeof(double), device, context)),
+  CHECK(a_gpu = static_cast<T *>(
+            malloc_shared(stride_a * batch * sizeof(T), device, context)),
         !a_gpu);
   CHECK(ipiv_gpu = static_cast<int64_t *>(malloc_shared(
             stride_ipiv * batch * sizeof(int64_t), device, context)),
@@ -322,7 +324,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
             malloc_shared(batch * sizeof(int64_t), device, context)),
         !info_gpu);
 
-  memcpy(a_gpu, a, stride_a * batch * sizeof(double));
+  memcpy(a_gpu, a, stride_a * batch * sizeof(T));
 
   sycl::nd_range<1> range(sycl::range<1>{static_cast<size_t>(batch)},
                           sycl::range<1>{1});
@@ -344,7 +346,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
     return;
   }
 
-  memcpy(a, a_gpu, stride_a * batch * sizeof(double));
+  memcpy(a, a_gpu, stride_a * batch * sizeof(T));
   memcpy(ipiv, ipiv_gpu, stride_ipiv * batch * sizeof(int64_t));
   memcpy(info, info_gpu, batch * sizeof(int64_t));
 
@@ -353,14 +355,16 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
   free(info_gpu, context);
 }
 
-static void fp_init(int64_t m, int64_t n, double *a, int64_t lda) {
+template <typename T>
+static void fp_init(int64_t m, int64_t n, T *a, int64_t lda) {
   int64_t i, j;
   for (j = 0; j < n; j++)
     for (i = 0; i < m; i++)
-      a[i + j * lda] = 2.0 * FP_RAND - 1.0;
+      a[i + j * lda] = 2.0 * ((T)rand() / (T)RAND_MAX) - 1.0;
 }
 
-static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b,
+template <typename T>
+static void fp_copy(int64_t m, int64_t n, T *a, int64_t lda, T *b,
                     int64_t ldb) {
   int64_t i, j;
   for (j = 0; j < n; j++)
@@ -368,8 +372,9 @@ static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b,
       b[i + j * ldb] = a[i + j * lda];
 }
 
-static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) {
-  double sum, value = 0.0;
+template <typename T>
+static T fp_norm1(int64_t m, int64_t n, T *a, int64_t lda) {
+  T sum, value = 0.0;
   int64_t i, j;
   for (j = 0; j < n; j++) {
     sum = 0.0;
@@ -381,28 +386,35 @@ static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) {
   return value;
 }
 
-static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in,
-                                        double *a, int64_t lda,
-                                        int64_t stride_a, int64_t *ipiv,
-                                        int64_t stride_ipiv, int64_t batch,
-                                        int64_t *info) {
-  double thresh = 30.0;
+template <typename T>
+static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, T *a_in, T *a,
+                                        int64_t lda, int64_t stride_a,
+                                        int64_t *ipiv, int64_t stride_ipiv,
+                                        int64_t batch, int64_t *info) {
+  T thresh = 30.0;
   int fail = 0;
   int64_t i, j, k, l;
   char label[1024];
-  unsigned char prec_b[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c};
-  double res = 0.0, nrm = 0.0, ulp = *(double *)prec_b;
-  double *w = (double *)malloc(sizeof(double) * MAX(m * n, 1));
+  unsigned char prec_b1[] = {0, 0, 0xb0, 0x3c};
+  unsigned char prec_b2[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c};
+  T res = 0.0, nrm = 0.0, ulp;
+  if (std::is_same<T, double>::value) {
+    ulp = *(T *)prec_b2;
+  } else {
+    ulp = *(T *)prec_b1;
+  }
+
+  T *w = (T *)malloc(sizeof(T) * MAX(m * n, 1));
 
   sprintf(label, "m=%ld, n=%ld, lda=%ld, batch=%ld", m, n, lda, batch);
 
   for (k = 0; k < batch; k++) {
     /* info == 0 */
-    CHECK_AND_REPORT("info == 0", label, info[k] != 0, (double)info[k], fail);
+    CHECK_AND_REPORT("info == 0", label, info[k] != 0, (T)info[k], fail);
 
     if (m > 0 && n > 0) {
       /* | L U - A | / ( |A| n ulp ) */
-      memset(w, 0, sizeof(double) * m * n);
+      memset(w, 0, sizeof(T) * m * n);
       if (m < n) {
         for (j = 0; j < n; j++)
           for (i = 0; i <= j; i++)
@@ -429,7 +441,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in,
           w[i + j * m] -= a_in[k * stride_a + i + j * lda];
       res = fp_norm1(m, n, w, m);
       nrm = fp_norm1(m, n, &a_in[k * stride_a], lda);
-      nrm *= (double)n * ulp;
+      nrm *= (T)n * ulp;
       res /= nrm > 0.0 ? nrm : ulp;
       CHECK_AND_REPORT("| L U - A | / ( |A| n ulp )", label,
                        FAILED(res, thresh), res, fail);
@@ -440,12 +452,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in,
   return fail;
 }
 
-void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
-                              int64_t stride_a, int64_t *ipiv,
-                              int64_t stride_ipiv, int64_t batch,
-                              int64_t *info);
-
-int main(int argc, char *argv[]) {
+template <typename T> int check(int argc, char *argv[]) {
   int exit_status = 0;
   int64_t m = 64, n = 64, lda = 64;
   int64_t stride_a = lda * n, stride_ipiv = n;
@@ -458,10 +465,10 @@ int main(int argc, char *argv[]) {
     int64_t a_count = MAX(stride_a * batch, 1);
     int64_t ipiv_count = MAX(stride_ipiv * batch, 1);
     int64_t info_count = MAX(batch, 1);
-    double *a = NULL, *a_copy = NULL;
+    T *a = NULL, *a_copy = NULL;
     int64_t *ipiv = NULL, *info = NULL;
-    CHECK(a = (double *)malloc(sizeof(double) * a_count), !a);
-    CHECK(a_copy = (double *)malloc(sizeof(double) * a_count), !a_copy);
+    CHECK(a = (T *)malloc(sizeof(T) * a_count), !a);
+    CHECK(a_copy = (T *)malloc(sizeof(T) * a_count), !a_copy);
     CHECK(ipiv = (int64_t *)malloc(sizeof(int64_t) * ipiv_count), !ipiv);
     CHECK(info = (int64_t *)malloc(sizeof(int64_t) * info_count), !info);
 
@@ -486,3 +493,14 @@ int main(int argc, char *argv[]) {
   }
   return exit_status;
 }
+
+int main(int argc, char *argv[]) {
+  int Passed = 0;
+
+  Passed += check<float>(argc, argv);
+#ifdef ENABLE_FP64
+  Passed += check<double>(argc, argv);
+#endif
+
+  return Passed;
+}
diff --git a/SYCL/ESIMD/regression/dgetrf_8x8.cpp b/SYCL/ESIMD/regression/dgetrf_8x8.cpp
index 521be63073..4604e78c2e 100644
--- a/SYCL/ESIMD/regression/dgetrf_8x8.cpp
+++ b/SYCL/ESIMD/regression/dgetrf_8x8.cpp
@@ -23,7 +23,6 @@
 #define ABS(x) ((x) >= 0 ? (x) : -(x))
 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
-#define FP_RAND ((double)rand() / (double)RAND_MAX)
 
 #define OUTN(text, ...) fprintf(stderr, text, ##__VA_ARGS__)
 #define OUT(text, ...) OUTN(text "\n", ##__VA_ARGS__)
@@ -49,15 +48,16 @@ using namespace sycl;
 using namespace std;
 using namespace sycl::ext::intel::esimd;
 
-ESIMD_PRIVATE ESIMD_REGISTER(384) simd<double, 3 * 32 * 4> GRF;
+template <typename T> ESIMD_PRIVATE ESIMD_REGISTER(384) simd<T, 3 * 32 * 4> GRF;
 
 #define V(x, w, i) (x).template select<w, 1>(i)
 #define V1(x, i) V(x, 1, i)
 #define V8(x, i) V(x, 8, i)
 #define BCAST8(x, i) (x).template replicate_w<8, 1>(i)
 
-template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
-  auto a = V(GRF, M * N, 0);
+template <typename T, int M, int N, int K>
+ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
+  auto a = V(GRF<T>, M * N, 0);
   for (int kk = 0; kk < N; kk += 8) {
     simd_mask<8> mask = 1;
     for (int k = 0; k < 8 && kk + k < N; k++) {
@@ -67,7 +67,7 @@ template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
       V1(mask, k) = 0;
       if (ak0[k] != 0.0) {
         // scal
-        double temp = 1.0 / ak0[k];
+        T temp = 1.0 / ak0[k];
         ak0.merge(ak0 * temp, mask);
         for (int i = 8 + K + kk; i < M; i += 8) {
           V8(ak, i) *= temp;
@@ -95,37 +95,39 @@ template <int M, int N, int K> ESIMD_INLINE void dgetrfnp_panel(int64_t *info) {
 // into GRF K - an update rank P0=A[0:M,0:K] = column(F=A[0:K,0:K],
 // L=A[K:M,0:K]) - panel to update with P1=A[0:M,K:K+N] = column(U=A[0:K,K:K+N],
 // T=A[K:M,K:K+N]) - panel to be updated
-template <int M, int N, int K>
-ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) {
-  auto p1 = V(GRF, M * N, 0);
-  double *a1;
+template <typename T, int M, int N, int K>
+ESIMD_INLINE void dgetrfnp_left_step(T *a, int64_t lda, int64_t *info) {
+  auto p1 = V(GRF<T>, M * N, 0);
+  T *a1;
   int i, j, k;
 
   // load P1
   for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda)
     for (i = 0; i < M; i += 8) {
-      simd<double, 8> data;
+      simd<T, 8> data;
       data.copy_from(a1 + i);
       V8(p1, j * M + i) = data;
     }
   // (getrf) factorize T=P*L*U
-  dgetrfnp_panel<M, N, K>(info);
+  dgetrfnp_panel<T, M, N, K>(info);
 
   // store P1
   for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda)
     for (i = 0; i < M; i += 8) {
-      simd<double, 8> vals = V8(p1, j * M + i);
+      simd<T, 8> vals = V8(p1, j * M + i);
       vals.copy_to(a1 + i);
     }
 }
 
-ESIMD_INLINE void dgetrfnp_esimd_8x8(double *a, int64_t lda, int64_t *ipiv,
+template <typename T>
+ESIMD_INLINE void dgetrfnp_esimd_8x8(T *a, int64_t lda, int64_t *ipiv,
                                      int64_t *info) {
   *info = 0;
-  dgetrfnp_left_step<8, 8, 0>(a, lda, info);
+  dgetrfnp_left_step<T, 8, 8, 0>(a, lda, info);
 }
 
-void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
+template <typename T>
+void dgetrfnp_batch_strided_c(int64_t m, int64_t n, T *a, int64_t lda,
                               int64_t stride_a, int64_t *ipiv,
                               int64_t stride_ipiv, int64_t batch,
                               int64_t *info) {
@@ -136,11 +138,11 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
 
   CHECK(status = device.is_gpu(), !status);
 
-  double *a_gpu;
+  T *a_gpu;
   int64_t *ipiv_gpu;
   int64_t *info_gpu;
-  CHECK(a_gpu = static_cast<double *>(
-            malloc_shared(stride_a * batch * sizeof(double), device, context)),
+  CHECK(a_gpu = static_cast<T *>(
+            malloc_shared(stride_a * batch * sizeof(T), device, context)),
         !a_gpu);
   CHECK(ipiv_gpu = static_cast<int64_t *>(malloc_shared(
             stride_ipiv * batch * sizeof(int64_t), device, context)),
@@ -149,7 +151,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
             malloc_shared(batch * sizeof(int64_t), device, context)),
         !info_gpu);
 
-  memcpy(a_gpu, a, stride_a * batch * sizeof(double));
+  memcpy(a_gpu, a, stride_a * batch * sizeof(T));
 
   sycl::nd_range<1> range(sycl::range<1>{static_cast<size_t>(batch)},
                           sycl::range<1>{1});
@@ -171,7 +173,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
     return;
   }
 
-  memcpy(a, a_gpu, stride_a * batch * sizeof(double));
+  memcpy(a, a_gpu, stride_a * batch * sizeof(T));
   memcpy(ipiv, ipiv_gpu, stride_ipiv * batch * sizeof(int64_t));
   memcpy(info, info_gpu, batch * sizeof(int64_t));
 
@@ -180,14 +182,16 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
   free(info_gpu, context);
 }
 
-static void fp_init(int64_t m, int64_t n, double *a, int64_t lda) {
+template <typename T>
+static void fp_init(int64_t m, int64_t n, T *a, int64_t lda) {
   int64_t i, j;
   for (j = 0; j < n; j++)
     for (i = 0; i < m; i++)
-      a[i + j * lda] = 2.0 * FP_RAND - 1.0;
+      a[i + j * lda] = 2.0 * ((T)rand() / (T)RAND_MAX) - 1.0;
 }
 
-static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b,
+template <typename T>
+static void fp_copy(int64_t m, int64_t n, T *a, int64_t lda, T *b,
                     int64_t ldb) {
   int64_t i, j;
   for (j = 0; j < n; j++)
@@ -195,8 +199,9 @@ static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b,
       b[i + j * ldb] = a[i + j * lda];
 }
 
-static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) {
-  double sum, value = 0.0;
+template <typename T>
+static T fp_norm1(int64_t m, int64_t n, T *a, int64_t lda) {
+  T sum, value = 0.0;
   int64_t i, j;
   for (j = 0; j < n; j++) {
     sum = 0.0;
@@ -208,28 +213,35 @@ static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) {
   return value;
 }
 
-static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in,
-                                        double *a, int64_t lda,
-                                        int64_t stride_a, int64_t *ipiv,
-                                        int64_t stride_ipiv, int64_t batch,
-                                        int64_t *info) {
-  double thresh = 30.0;
+template <typename T>
+static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, T *a_in, T *a,
+                                        int64_t lda, int64_t stride_a,
+                                        int64_t *ipiv, int64_t stride_ipiv,
+                                        int64_t batch, int64_t *info) {
+  T thresh = 30.0;
   int fail = 0;
   int64_t i, j, k, l;
   char label[1024];
-  unsigned char prec_b[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c};
-  double res = 0.0, nrm = 0.0, ulp = *(double *)prec_b;
-  double *w = (double *)malloc(sizeof(double) * MAX(m * n, 1));
+  unsigned char prec_b1[] = {0, 0, 0xb0, 0x3c};
+  unsigned char prec_b2[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c};
+  T res = 0.0, nrm = 0.0, ulp;
+  if (std::is_same<T, double>::value) {
+    ulp = *(T *)prec_b2;
+  } else {
+    ulp = *(T *)prec_b1;
+  };
+
+  T *w = (T *)malloc(sizeof(T) * MAX(m * n, 1));
 
   sprintf(label, "m=%ld, n=%ld, lda=%ld, batch=%ld", m, n, lda, batch);
 
   for (k = 0; k < batch; k++) {
     /* info == 0 */
-    CHECK_AND_REPORT("info == 0", label, info[k] != 0, (double)info[k], fail);
+    CHECK_AND_REPORT("info == 0", label, info[k] != 0, (T)info[k], fail);
 
     if (m > 0 && n > 0) {
       /* | L U - A | / ( |A| n ulp ) */
-      memset(w, 0, sizeof(double) * m * n);
+      memset(w, 0, sizeof(T) * m * n);
       if (m < n) {
         for (j = 0; j < n; j++)
           for (i = 0; i <= j; i++)
@@ -256,7 +268,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in,
           w[i + j * m] -= a_in[k * stride_a + i + j * lda];
       res = fp_norm1(m, n, w, m);
       nrm = fp_norm1(m, n, &a_in[k * stride_a], lda);
-      nrm *= (double)n * ulp;
+      nrm *= (T)n * ulp;
       res /= nrm > 0.0 ? nrm : ulp;
       CHECK_AND_REPORT("| L U - A | / ( |A| n ulp )", label,
                        FAILED(res, thresh), res, fail);
@@ -267,12 +279,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in,
   return fail;
 }
 
-void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda,
-                              int64_t stride_a, int64_t *ipiv,
-                              int64_t stride_ipiv, int64_t batch,
-                              int64_t *info);
-
-int main(int argc, char *argv[]) {
+template <typename T> int check(int argc, char *argv[]) {
   int exit_status = 0;
   constexpr int64_t m = 8, n = 8, lda = 8;
   int64_t stride_a = lda * n, stride_ipiv = n;
@@ -285,10 +292,10 @@ int main(int argc, char *argv[]) {
     int64_t a_count = MAX(stride_a * batch, 1);
     int64_t ipiv_count = MAX(stride_ipiv * batch, 1);
     int64_t info_count = MAX(batch, 1);
-    double *a = NULL, *a_copy = NULL;
+    T *a = NULL, *a_copy = NULL;
     int64_t *ipiv = NULL, *info = NULL;
-    CHECK(a = (double *)malloc(sizeof(double) * a_count), !a);
-    CHECK(a_copy = (double *)malloc(sizeof(double) * a_count), !a_copy);
+    CHECK(a = (T *)malloc(sizeof(T) * a_count), !a);
+    CHECK(a_copy = (T *)malloc(sizeof(T) * a_count), !a_copy);
     CHECK(ipiv = (int64_t *)malloc(sizeof(int64_t) * ipiv_count), !ipiv);
     CHECK(info = (int64_t *)malloc(sizeof(int64_t) * info_count), !info);
 
@@ -313,3 +320,14 @@ int main(int argc, char *argv[]) {
   }
   return exit_status;
 }
+
+int main(int argc, char *argv[]) {
+  int Passed = 0;
+
+  Passed += check<float>(argc, argv);
+#ifdef ENABLE_FP64
+  Passed += check<double>(argc, argv);
+#endif
+
+  return Passed;
+}
diff --git a/SYCL/ESIMD/regression/dgetrf_8x8_aspect_fp64.cpp b/SYCL/ESIMD/regression/dgetrf_8x8_aspect_fp64.cpp
new file mode 100644
index 0000000000..6adfc7d568
--- /dev/null
+++ b/SYCL/ESIMD/regression/dgetrf_8x8_aspect_fp64.cpp
@@ -0,0 +1,18 @@
+//==------- dgetrf_8x8_aspect_fp64.cpp  - DPC++ ESIMD on-device test -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <dgetrf_8x8.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -I%S/.. -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out 1
+//
+#include "dgetrf_8x8.cpp"
diff --git a/SYCL/ESIMD/regression/dgetrf_aspect_fp64.cpp b/SYCL/ESIMD/regression/dgetrf_aspect_fp64.cpp
new file mode 100644
index 0000000000..29c31249c1
--- /dev/null
+++ b/SYCL/ESIMD/regression/dgetrf_aspect_fp64.cpp
@@ -0,0 +1,18 @@
+//==--------- dgetrf_aspect_fp64.cpp  - DPC++ ESIMD on-device test ---------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <dgetrf.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -I%S/.. -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out 3 2 1
+
+#include "Inputs/dgetrf.hpp"
diff --git a/SYCL/ESIMD/regression/dgetrf_ref_aspect_fp64.cpp b/SYCL/ESIMD/regression/dgetrf_ref_aspect_fp64.cpp
new file mode 100644
index 0000000000..bb91041317
--- /dev/null
+++ b/SYCL/ESIMD/regression/dgetrf_ref_aspect_fp64.cpp
@@ -0,0 +1,19 @@
+//==------- dgetrf_ref_aspect_fp64.cpp  - DPC++ ESIMD on-device test -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Enable FP64 part of <dgetrf_ref.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: gpu, aspect-fp64
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -fsycl -DUSE_REF -DENABLE_FP64 %s -I%S/.. -o %t.ref.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.ref.out 3 2 1
+//
+
+#include "Inputs/dgetrf.hpp"
diff --git a/SYCL/ESIMD/spec_const/spec_const_double.cpp b/SYCL/ESIMD/spec_const/spec_const_double.cpp
index 658164f702..8358a4fa82 100644
--- a/SYCL/ESIMD/spec_const/spec_const_double.cpp
+++ b/SYCL/ESIMD/spec_const/spec_const_double.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu
+// REQUIRES: gpu, aspect-fp64
 // RUN: %clangxx -fsycl -I%S/.. %s -o %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // UNSUPPORTED: cuda || hip
diff --git a/SYCL/GroupAlgorithm/SYCL2020/sort.cpp b/SYCL/GroupAlgorithm/SYCL2020/sort.cpp
index b3e3875b2c..73416de04a 100644
--- a/SYCL/GroupAlgorithm/SYCL2020/sort.cpp
+++ b/SYCL/GroupAlgorithm/SYCL2020/sort.cpp
@@ -358,10 +358,11 @@ int main(int argc, char *argv[]) {
     test_sort_by_type<std::uint32_t>(q, sizes[i]);
     test_sort_by_type<float>(q, sizes[i]);
     test_sort_by_type<sycl::half>(q, sizes[i]);
-    test_sort_by_type<double>(q, sizes[i]);
     test_sort_by_type<std::size_t>(q, sizes[i]);
-
     test_custom_type(q, sizes[i]);
+#ifdef ENABLE_FP64
+    test_sort_by_type<double>(q, sizes[i]);
+#endif
   }
   std::cout << "Test passed." << std::endl;
 }
diff --git a/SYCL/GroupAlgorithm/SYCL2020/sort_aspect_fp64.cpp b/SYCL/GroupAlgorithm/SYCL2020/sort_aspect_fp64.cpp
new file mode 100644
index 0000000000..95137d2200
--- /dev/null
+++ b/SYCL/GroupAlgorithm/SYCL2020/sort_aspect_fp64.cpp
@@ -0,0 +1,12 @@
+// Enable FP64 part of <sort.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -I . -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include "sort.cpp"
diff --git a/SYCL/InlineAsm/asm_float_add.cpp b/SYCL/InlineAsm/asm_float_add.cpp
index a9a7edd16f..f8aa0d6679 100644
--- a/SYCL/InlineAsm/asm_float_add.cpp
+++ b/SYCL/InlineAsm/asm_float_add.cpp
@@ -9,9 +9,7 @@
 #include <sycl/sycl.hpp>
 #include <vector>
 
-using dataType = sycl::cl_double;
-
-template <typename T = dataType>
+template <typename T>
 struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
   KernelFunctor(const std::vector<T> &input1, const std::vector<T> &input2)
       : WithInputBuffers<T, 2>(input1, input2), WithOutputBuffer<T>(
@@ -42,26 +40,36 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
   }
 };
 
-int main() {
-  std::vector<dataType> inputA(DEFAULT_PROBLEM_SIZE),
-      inputB(DEFAULT_PROBLEM_SIZE);
+template <typename T1, typename T2> bool check() {
+  std::vector<T2> inputA(DEFAULT_PROBLEM_SIZE), inputB(DEFAULT_PROBLEM_SIZE);
   for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) {
-    inputA[i] = (double)1 / std::pow(2, i);
-    inputB[i] = (double)2 / std::pow(2, i);
+    inputA[i] = (T1)1 / std::pow(2, i);
+    inputB[i] = (T1)2 / std::pow(2, i);
   }
 
-  KernelFunctor<> f(inputA, inputB);
+  KernelFunctor<T2> f(inputA, inputB);
   if (!launchInlineASMTest(f))
-    return 0;
+    return true;
 
   auto &C = f.getOutputBufferData();
   for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) {
     if (C[i] != inputA[i] + inputB[i]) {
       std::cerr << "At index: " << i << ". ";
       std::cerr << C[i] << " != " << inputA[i] + inputB[i] << "\n";
-      return 1;
+      return false;
     }
   }
 
-  return 0;
+  return true;
+}
+
+int main() {
+  bool Passed = true;
+
+  Passed &= check<float, sycl::cl_float>();
+#ifdef ENABLE_FP64
+  Passed &= check<double, sycl::cl_double>();
+#endif
+
+  return Passed ? 0 : 1;
 }
diff --git a/SYCL/InlineAsm/asm_float_add_aspect_fp64.cpp b/SYCL/InlineAsm/asm_float_add_aspect_fp64.cpp
new file mode 100644
index 0000000000..b923f9a5cd
--- /dev/null
+++ b/SYCL/InlineAsm/asm_float_add_aspect_fp64.cpp
@@ -0,0 +1,11 @@
+// Enable FP64 part of <asm_float_add.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// UNSUPPORTED: cuda || hip_nvidia
+// REQUIRES: gpu,linux,aspect-fp64
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+#include "asm_float_add.cpp"
diff --git a/SYCL/InlineAsm/asm_float_imm_arg.cpp b/SYCL/InlineAsm/asm_float_imm_arg.cpp
index 13aae5c455..3946beaefa 100644
--- a/SYCL/InlineAsm/asm_float_imm_arg.cpp
+++ b/SYCL/InlineAsm/asm_float_imm_arg.cpp
@@ -9,13 +9,12 @@
 #include <sycl/sycl.hpp>
 #include <vector>
 
-constexpr double IMM_ARGUMENT = 0.5;
-using dataType = sycl::cl_double;
+template <typename T> constexpr T IMM_ARGUMENT = T(0.5);
 
-template <typename T = dataType>
-struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
-  KernelFunctor(const std::vector<T> &input)
-      : WithInputBuffers<T, 1>(input), WithOutputBuffer<T>(input.size()) {}
+template <typename T1, typename T2>
+struct KernelFunctor : WithInputBuffers<T2, 1>, WithOutputBuffer<T2> {
+  KernelFunctor(const std::vector<T2> &input)
+      : WithInputBuffers<T2, 1>(input), WithOutputBuffer<T2>(input.size()) {}
 
   void operator()(sycl::handler &cgh) {
     auto A =
@@ -25,36 +24,49 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
         this->getOutputBuffer().template get_access<sycl::access::mode::write>(
             cgh);
 
-    cgh.parallel_for<KernelFunctor<T>>(
+    cgh.parallel_for<KernelFunctor<T1, T2>>(
         sycl::range<1>{this->getOutputBufferSize()},
         [=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
 #if defined(__SYCL_DEVICE_ONLY__)
           asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
               : "=rw"(B[wiID])
-              : "rw"(A[wiID]), "i"(IMM_ARGUMENT));
+              : "rw"(A[wiID]), "i"(IMM_ARGUMENT<T1>));
 #else
-          B[wiID] = A[wiID] * IMM_ARGUMENT;
+          B[wiID] = A[wiID] * IMM_ARGUMENT<T1>;
 #endif
         });
   }
 };
 
-int main() {
-  std::vector<dataType> input(DEFAULT_PROBLEM_SIZE);
+template <typename T1, typename T2> bool check() {
+  constexpr T1 IMM_ARGUMENT = T1(0.5);
+
+  std::vector<T2> input(DEFAULT_PROBLEM_SIZE);
   for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++)
-    input[i] = (double)1 / std::pow(2, i);
+    input[i] = (T1)1 / std::pow(2, i);
 
-  KernelFunctor<> f(input);
+  KernelFunctor<T1, T2> f(input);
   if (!launchInlineASMTest(f))
-    return 0;
+    return true;
 
   auto &B = f.getOutputBufferData();
   for (int i = 0; i < DEFAULT_PROBLEM_SIZE; ++i) {
     if (B[i] != input[i] * IMM_ARGUMENT) {
       std::cerr << "At index: " << i << ". ";
       std::cerr << B[i] << " != " << input[i] * IMM_ARGUMENT << "\n";
-      return 1;
+      return false;
     }
   }
-  return 0;
+  return true;
+}
+
+int main() {
+  bool Passed = true;
+
+  Passed &= check<float, sycl::cl_float>();
+#ifdef ENABLE_FP64
+  Passed &= check<double, sycl::cl_double>();
+#endif
+
+  return Passed ? 0 : 1;
 }
diff --git a/SYCL/InlineAsm/asm_float_imm_arg_aspect_fp64.cpp b/SYCL/InlineAsm/asm_float_imm_arg_aspect_fp64.cpp
new file mode 100644
index 0000000000..a837defb76
--- /dev/null
+++ b/SYCL/InlineAsm/asm_float_imm_arg_aspect_fp64.cpp
@@ -0,0 +1,11 @@
+// Enable FP64 part of <asm_float_imm_arg.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// UNSUPPORTED: cuda || hip_nvidia
+// REQUIRES: gpu,linux,aspect-fp64
+// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+#include "asm_float_imm_arg.cpp"
diff --git a/SYCL/KernelParams/union_kernel_param.cpp b/SYCL/KernelParams/union_kernel_param.cpp
index 0cacf310a8..c1f6ea1ca5 100644
--- a/SYCL/KernelParams/union_kernel_param.cpp
+++ b/SYCL/KernelParams/union_kernel_param.cpp
@@ -9,32 +9,43 @@
 #include <cstdio>
 #include <sycl/sycl.hpp>
 
-union TestUnion {
+template <typename T> union TestUnion {
 public:
   int myint;
   char mychar;
-  double mydouble;
+  T mytype;
 
-  TestUnion() { mydouble = 0.0; };
+  TestUnion() { mytype = 0.0; };
 };
 
-int main(int argc, char **argv) {
-  TestUnion x;
-  x.mydouble = 5.0;
-  double mydouble = 0.0;
+template <typename T> bool check() {
+  TestUnion<T> x;
+  x.mytype = 5.0;
+  T mytype = 0.0;
 
   sycl::queue queue;
   {
-    sycl::buffer<double, 1> buf(&mydouble, 1);
+    sycl::buffer<T, 1> buf(&mytype, 1);
     queue.submit([&](sycl::handler &cgh) {
-      auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
-      cgh.single_task<class test>([=]() { acc[0] = x.mydouble; });
+      auto acc = buf.template get_access<sycl::access::mode::read_write>(cgh);
+      cgh.single_task<class test>([=]() { acc[0] = x.mytype; });
     });
   }
 
-  if (mydouble != 5.0) {
-    printf("FAILED\nmydouble = %d\n", mydouble);
-    return 1;
+  if (mytype != 5.0) {
+    printf("FAILED\nmytype = %d\n", mytype);
+    return false;
   }
-  return 0;
+  return true;
+}
+
+int main(int argc, char **argv) {
+  bool Passed = true;
+
+  Passed &= check<float>();
+#ifdef ENABLE_FP64
+  Passed &= check<double>();
+#endif
+
+  return Passed ? 0 : 1;
 }
diff --git a/SYCL/KernelParams/union_kernel_param_aspect_fp64.cpp b/SYCL/KernelParams/union_kernel_param_aspect_fp64.cpp
new file mode 100644
index 0000000000..de5afb7dfa
--- /dev/null
+++ b/SYCL/KernelParams/union_kernel_param_aspect_fp64.cpp
@@ -0,0 +1,12 @@
+// Enable FP64 part of <union_kernel_param.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+#include "union_kernel_param.cpp"
diff --git a/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp b/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp
index 0057048134..c7ff46673d 100644
--- a/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp
+++ b/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp
@@ -51,7 +51,7 @@ float find_prime_s(work *w) {
         if (number < N) {
           for (size_t i = 0; i < niter; ++i) {
             bool is_prime = !(number % 2 == 0);
-            const int upper_bound = sycl::sqrt(1.0 * number) + 1;
+            const int upper_bound = sycl::sqrt(1.0f * number) + 1;
             int k = 3;
             while (k < upper_bound && is_prime) {
               is_prime = !(number % k == 0);
diff --git a/SYCL/SpecConstants/2020/handler-api.cpp b/SYCL/SpecConstants/2020/handler-api.cpp
index f9df9b82a1..2c3f8d8065 100644
--- a/SYCL/SpecConstants/2020/handler-api.cpp
+++ b/SYCL/SpecConstants/2020/handler-api.cpp
@@ -23,7 +23,9 @@
 
 constexpr sycl::specialization_id<int> int_id;
 constexpr sycl::specialization_id<int> int_id2(2);
+#ifdef ENABLE_FP64
 constexpr sycl::specialization_id<double> double_id(3.14);
+#endif
 constexpr sycl::specialization_id<custom_type> custom_type_id;
 
 class TestDefaultValuesKernel;
@@ -71,19 +73,25 @@ int main() {
 bool test_default_values(sycl::queue q) {
   sycl::buffer<int> int_buffer(1);
   sycl::buffer<int> int_buffer2(1);
+#ifdef ENABLE_FP64
   sycl::buffer<double> double_buffer(1);
+#endif
   sycl::buffer<custom_type> custom_type_buffer(1);
 
   q.submit([&](sycl::handler &cgh) {
     auto int_acc = int_buffer.get_access<sycl::access::mode::write>(cgh);
     auto int_acc2 = int_buffer2.get_access<sycl::access::mode::write>(cgh);
+#ifdef ENABLE_FP64
     auto double_acc = double_buffer.get_access<sycl::access::mode::write>(cgh);
+#endif
     auto custom_type_acc =
         custom_type_buffer.get_access<sycl::access::mode::write>(cgh);
     cgh.single_task<TestDefaultValuesKernel>([=](sycl::kernel_handler kh) {
       int_acc[0] = kh.get_specialization_constant<int_id>();
       int_acc2[0] = kh.get_specialization_constant<int_id2>();
+#ifdef ENABLE_FP64
       double_acc[0] = kh.get_specialization_constant<double_id>();
+#endif
       custom_type_acc[0] = kh.get_specialization_constant<custom_type_id>();
     });
   });
@@ -98,9 +106,11 @@ bool test_default_values(sycl::queue q) {
   if (!check_value(2, int_acc2[0], "integer specialization constant"))
     return false;
 
+#ifdef ENABLE_FP64
   auto double_acc = double_buffer.get_access<sycl::access::mode::read>();
   if (!check_value(3.14, double_acc[0], "double specialization constant"))
     return false;
+#endif
 
   auto custom_type_acc =
       custom_type_buffer.get_access<sycl::access::mode::read>();
@@ -120,9 +130,11 @@ bool test_set_and_get_on_host(sycl::queue q) {
             "integer specializaiton constant before setting any value"))
       ++errors;
 
+#ifdef ENABLE_FP64
     if (!check_value(3.14, cgh.get_specialization_constant<double_id>(),
                      "double specializaiton constant before setting any value"))
       ++errors;
+#endif
 
     custom_type custom_type_ref;
     if (!check_value(
@@ -131,10 +143,15 @@ bool test_set_and_get_on_host(sycl::queue q) {
       ++errors;
 
     int new_int_value = 8;
+#ifdef ENABLE_FP64
     double new_double_value = 3.0;
+#endif
     custom_type new_custom_type_value('b', 1.0, 12);
+
     cgh.set_specialization_constant<int_id>(new_int_value);
+#ifdef ENABLE_FP64
     cgh.set_specialization_constant<double_id>(new_double_value);
+#endif
     cgh.set_specialization_constant<custom_type_id>(new_custom_type_value);
 
     if (!check_value(
@@ -142,10 +159,12 @@ bool test_set_and_get_on_host(sycl::queue q) {
             "integer specializaiton constant after setting a new value"))
       ++errors;
 
+#ifdef ENABLE_FP64
     if (!check_value(
             new_double_value, cgh.get_specialization_constant<double_id>(),
             "double specializaiton constant after setting a new value"))
       ++errors;
+#endif
 
     if (!check_value(
             new_custom_type_value,
@@ -162,30 +181,40 @@ bool test_set_and_get_on_host(sycl::queue q) {
 bool test_set_and_get_on_device(sycl::queue q) {
   sycl::buffer<int> int_buffer(1);
   sycl::buffer<int> int_buffer2(1);
+#ifdef ENABLE_FP64
   sycl::buffer<double> double_buffer(1);
+#endif
   sycl::buffer<custom_type> custom_type_buffer(1);
 
   int new_int_value = 8;
   int new_int_value2 = 0;
+#ifdef ENABLE_FP64
   double new_double_value = 3.0;
+#endif
   custom_type new_custom_type_value('b', 1.0, 12);
 
   q.submit([&](sycl::handler &cgh) {
     auto int_acc = int_buffer.get_access<sycl::access::mode::write>(cgh);
     auto int_acc2 = int_buffer2.get_access<sycl::access::mode::write>(cgh);
+#ifdef ENABLE_FP64
     auto double_acc = double_buffer.get_access<sycl::access::mode::write>(cgh);
+#endif
     auto custom_type_acc =
         custom_type_buffer.get_access<sycl::access::mode::write>(cgh);
 
     cgh.set_specialization_constant<int_id>(new_int_value);
-    cgh.set_specialization_constant<int_id2>(new_int_value2);
+#ifdef ENABLE_FP64
     cgh.set_specialization_constant<double_id>(new_double_value);
+#endif
+    cgh.set_specialization_constant<int_id2>(new_int_value2);
     cgh.set_specialization_constant<custom_type_id>(new_custom_type_value);
 
     cgh.single_task<TestSetAndGetOnDevice>([=](sycl::kernel_handler kh) {
       int_acc[0] = kh.get_specialization_constant<int_id>();
       int_acc2[0] = kh.get_specialization_constant<int_id2>();
+#ifdef ENABLE_FP64
       double_acc[0] = kh.get_specialization_constant<double_id>();
+#endif
       custom_type_acc[0] = kh.get_specialization_constant<custom_type_id>();
     });
   });
@@ -200,10 +229,12 @@ bool test_set_and_get_on_device(sycl::queue q) {
                    "integer specialization constant"))
     return false;
 
+#ifdef ENABLE_FP64
   auto double_acc = double_buffer.get_access<sycl::access::mode::read>();
   if (!check_value(new_double_value, double_acc[0],
                    "double specialization constant"))
     return false;
+#endif
 
   auto custom_type_acc =
       custom_type_buffer.get_access<sycl::access::mode::read>();
diff --git a/SYCL/SpecConstants/2020/handler-api_aspect_fp64.cpp b/SYCL/SpecConstants/2020/handler-api_aspect_fp64.cpp
new file mode 100644
index 0000000000..3c4b3edc8a
--- /dev/null
+++ b/SYCL/SpecConstants/2020/handler-api_aspect_fp64.cpp
@@ -0,0 +1,15 @@
+// Enable FP64 part of <handler-api.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out \
+// RUN:          -fsycl-dead-args-optimization
+// FIXME: SYCL 2020 specialization constants are not supported on host device
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// FIXME: ACC devices use emulation path, which is not yet supported
+// UNSUPPORTED: hip
+
+#include "handler-api.cpp"
diff --git a/SYCL/SpecConstants/2020/kernel-bundle-api.cpp b/SYCL/SpecConstants/2020/kernel-bundle-api.cpp
index 900da6fe09..09c4a24a74 100644
--- a/SYCL/SpecConstants/2020/kernel-bundle-api.cpp
+++ b/SYCL/SpecConstants/2020/kernel-bundle-api.cpp
@@ -21,8 +21,10 @@
 #include "common.hpp"
 
 constexpr sycl::specialization_id<int> int_id;
-constexpr sycl::specialization_id<double> double_id(3.14);
 constexpr sycl::specialization_id<custom_type> custom_type_id;
+#ifdef ENABLE_FP64
+constexpr sycl::specialization_id<double> double_id(3.14);
+#endif
 
 class TestDefaultValuesKernel;
 class EmptyKernel;
@@ -76,9 +78,10 @@ bool test_default_values(sycl::queue q) {
   }
 
   sycl::buffer<int> int_buffer(1);
-  sycl::buffer<double> double_buffer(1);
   sycl::buffer<custom_type> custom_type_buffer(1);
-
+#ifdef ENABLE_FP64
+  sycl::buffer<double> double_buffer(1);
+#endif
   auto input_bundle =
       sycl::get_kernel_bundle<sycl::bundle_state::input>(q.get_context());
   auto exec_bundle = sycl::build(input_bundle);
@@ -86,33 +89,36 @@ bool test_default_values(sycl::queue q) {
   q.submit([&](sycl::handler &cgh) {
     cgh.use_kernel_bundle(exec_bundle);
     auto int_acc = int_buffer.get_access<sycl::access::mode::write>(cgh);
-    auto double_acc = double_buffer.get_access<sycl::access::mode::write>(cgh);
     auto custom_type_acc =
         custom_type_buffer.get_access<sycl::access::mode::write>(cgh);
+#ifdef ENABLE_FP64
+    auto double_acc = double_buffer.get_access<sycl::access::mode::write>(cgh);
+#endif
     cgh.single_task<TestDefaultValuesKernel>([=](sycl::kernel_handler kh) {
       int_acc[0] = kh.get_specialization_constant<int_id>();
-      double_acc[0] = kh.get_specialization_constant<double_id>();
       custom_type_acc[0] = kh.get_specialization_constant<custom_type_id>();
+#ifdef ENABLE_FP64
+      double_acc[0] = kh.get_specialization_constant<double_id>();
+#endif
     });
   });
-
   auto int_acc = int_buffer.get_access<sycl::access::mode::read>();
   if (!check_value(
           0, int_acc[0],
           "integer specialization constant (defined without default value)"))
     return false;
 
-  auto double_acc = double_buffer.get_access<sycl::access::mode::read>();
-  if (!check_value(3.14, double_acc[0], "double specialization constant"))
-    return false;
-
   auto custom_type_acc =
       custom_type_buffer.get_access<sycl::access::mode::read>();
   const custom_type custom_type_ref;
   if (!check_value(custom_type_ref, custom_type_acc[0],
                    "custom_type specialization constant"))
     return false;
-
+#ifdef ENABLE_FP64
+  auto double_acc = double_buffer.get_access<sycl::access::mode::read>();
+  if (!check_value(3.14, double_acc[0], "double specialization constant"))
+    return false;
+#endif
   return true;
 }
 
@@ -138,34 +144,37 @@ bool test_set_and_get_on_host(sycl::queue q) {
           << std::endl;
       return false;
     }
-
     // Check default values
     if (!check_value(
             0, input_bundle.get_specialization_constant<int_id>(),
             "integer specializaiton constant before setting any value"))
       ++errors;
 
-    if (!check_value(3.14,
-                     input_bundle.get_specialization_constant<double_id>(),
-                     "double specializaiton constant before setting any value"))
-      ++errors;
-
     custom_type custom_type_ref;
     if (!check_value(
             custom_type_ref,
             input_bundle.get_specialization_constant<custom_type_id>(),
             "custom_type specializaiton constant before setting any value"))
       ++errors;
-
+#ifdef ENABLE_FP64
+    if (!check_value(3.14,
+                     input_bundle.get_specialization_constant<double_id>(),
+                     "double specializaiton constant before setting any value"))
+      ++errors;
+#endif
     // Update values
     int new_int_value = 42;
-    double new_double_value = 3.0;
     custom_type new_custom_type_value('b', 1.0, 12);
+#ifdef ENABLE_FP64
+    double new_double_value = 3.0;
+#endif
 
     input_bundle.set_specialization_constant<int_id>(new_int_value);
-    input_bundle.set_specialization_constant<double_id>(new_double_value);
     input_bundle.set_specialization_constant<custom_type_id>(
         new_custom_type_value);
+#ifdef ENABLE_FP64
+    input_bundle.set_specialization_constant<double_id>(new_double_value);
+#endif
 
     // And re-check them again
     if (!check_value(
@@ -173,16 +182,17 @@ bool test_set_and_get_on_host(sycl::queue q) {
             "integer specializaiton constant after setting a new value"))
       ++errors;
 
-    if (!check_value(new_double_value,
-                     input_bundle.get_specialization_constant<double_id>(),
-                     "double specializaiton constant after setting a value"))
-      ++errors;
-
     if (!check_value(
             new_custom_type_value,
             input_bundle.get_specialization_constant<custom_type_id>(),
             "custom_type specializaiton constant after setting a new value"))
       ++errors;
+#ifdef ENABLE_FP64
+    if (!check_value(new_double_value,
+                     input_bundle.get_specialization_constant<double_id>(),
+                     "double specializaiton constant after setting a value"))
+      ++errors;
+#endif
 
     // Let's try to build the bundle
     auto exec_bundle = sycl::build(input_bundle);
@@ -193,15 +203,16 @@ bool test_set_and_get_on_host(sycl::queue q) {
                      "integer specializaiton constant after build"))
       ++errors;
 
-    if (!check_value(new_double_value,
-                     exec_bundle.get_specialization_constant<double_id>(),
-                     "double specializaiton constant after build"))
-      ++errors;
-
     if (!check_value(new_custom_type_value,
                      exec_bundle.get_specialization_constant<custom_type_id>(),
                      "custom_type specializaiton constant after build"))
       ++errors;
+#ifdef ENABLE_FP64
+    if (!check_value(new_double_value,
+                     exec_bundle.get_specialization_constant<double_id>(),
+                     "double specializaiton constant after build"))
+      ++errors;
+#endif
   } catch (sycl::exception &e) {
   }
 
@@ -210,50 +221,57 @@ bool test_set_and_get_on_host(sycl::queue q) {
 
 bool test_set_and_get_on_device(sycl::queue q) {
   sycl::buffer<int> int_buffer(1);
-  sycl::buffer<double> double_buffer(1);
   sycl::buffer<custom_type> custom_type_buffer(1);
+#ifdef ENABLE_FP64
+  sycl::buffer<double> double_buffer(1);
+#endif
 
   int new_int_value = 42;
-  double new_double_value = 3.0;
   custom_type new_custom_type_value('b', 1.0, 12);
+#ifdef ENABLE_FP64
+  double new_double_value = 3.0;
+#endif
 
   auto input_bundle =
       sycl::get_kernel_bundle<sycl::bundle_state::input>(q.get_context());
   input_bundle.set_specialization_constant<int_id>(new_int_value);
-  input_bundle.set_specialization_constant<double_id>(new_double_value);
   input_bundle.set_specialization_constant<custom_type_id>(
       new_custom_type_value);
+#ifdef ENABLE_FP64
+  input_bundle.set_specialization_constant<double_id>(new_double_value);
+#endif
   auto exec_bundle = sycl::build(input_bundle);
-
   q.submit([&](sycl::handler &cgh) {
     cgh.use_kernel_bundle(exec_bundle);
     auto int_acc = int_buffer.get_access<sycl::access::mode::write>(cgh);
-    auto double_acc = double_buffer.get_access<sycl::access::mode::write>(cgh);
     auto custom_type_acc =
         custom_type_buffer.get_access<sycl::access::mode::write>(cgh);
-
+#ifdef ENABLE_FP64
+    auto double_acc = double_buffer.get_access<sycl::access::mode::write>(cgh);
+#endif
     cgh.single_task<TestSetAndGetOnDevice>([=](sycl::kernel_handler kh) {
       int_acc[0] = kh.get_specialization_constant<int_id>();
-      double_acc[0] = kh.get_specialization_constant<double_id>();
       custom_type_acc[0] = kh.get_specialization_constant<custom_type_id>();
+#ifdef ENABLE_FP64
+      double_acc[0] = kh.get_specialization_constant<double_id>();
+#endif
     });
   });
-
   auto int_acc = int_buffer.get_access<sycl::access::mode::read>();
   if (!check_value(new_int_value, int_acc[0],
                    "integer specialization constant"))
     return false;
 
-  auto double_acc = double_buffer.get_access<sycl::access::mode::read>();
-  if (!check_value(new_double_value, double_acc[0],
-                   "double specialization constant"))
-    return false;
-
   auto custom_type_acc =
       custom_type_buffer.get_access<sycl::access::mode::read>();
   if (!check_value(new_custom_type_value, custom_type_acc[0],
                    "custom_type specialization constant"))
     return false;
-
+#ifdef ENABLE_FP64
+  auto double_acc = double_buffer.get_access<sycl::access::mode::read>();
+  if (!check_value(new_double_value, double_acc[0],
+                   "double specialization constant"))
+    return false;
+#endif
   return true;
 }
diff --git a/SYCL/SpecConstants/2020/kernel-bundle-api_aspect_fp64.cpp b/SYCL/SpecConstants/2020/kernel-bundle-api_aspect_fp64.cpp
new file mode 100644
index 0000000000..e6bc742141
--- /dev/null
+++ b/SYCL/SpecConstants/2020/kernel-bundle-api_aspect_fp64.cpp
@@ -0,0 +1,15 @@
+// Enable FP64 part of <kernel-bundle-api.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out \
+// RUN:          -fsycl-dead-args-optimization
+// FIXME: SYCL 2020 specialization constants are not supported on host device
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// FIXME: ACC devices use emulation path, which is not yet supported
+// UNSUPPORTED: hip
+
+#include "kernel-bundle-api.cpp"
diff --git a/SYCL/SubGroup/barrier.cpp b/SYCL/SubGroup/barrier.cpp
index 050424b359..3b50c0198f 100644
--- a/SYCL/SubGroup/barrier.cpp
+++ b/SYCL/SubGroup/barrier.cpp
@@ -86,10 +86,12 @@ int main() {
   check<long, true>(Queue);
   check<unsigned long, true>(Queue);
   check<float, true>(Queue);
+#ifdef ENABLE_FP64
   if (Queue.get_device().has(sycl::aspect::fp64)) {
     check<double>(Queue);
     check<double, true>(Queue);
   }
+#endif
   std::cout << "Test passed." << std::endl;
   return 0;
 }
diff --git a/SYCL/SubGroup/barrier_aspect_fp64.cpp b/SYCL/SubGroup/barrier_aspect_fp64.cpp
new file mode 100644
index 0000000000..bb720a7087
--- /dev/null
+++ b/SYCL/SubGroup/barrier_aspect_fp64.cpp
@@ -0,0 +1,19 @@
+// Enable FP64 part of <barrier.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+//==-- barrier_aspect_fp64.cpp - SYCL sub_group barrier test ---*- C++ -*---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "barrier.cpp"
diff --git a/SYCL/SubGroup/broadcast_fp64.cpp b/SYCL/SubGroup/broadcast_fp64.cpp
index 46af65d9e1..4a4e5f4ba4 100644
--- a/SYCL/SubGroup/broadcast_fp64.cpp
+++ b/SYCL/SubGroup/broadcast_fp64.cpp
@@ -1,3 +1,4 @@
+// REQUIRES: aspect-fp64
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
diff --git a/SYCL/SubGroup/generic-shuffle.cpp b/SYCL/SubGroup/generic-shuffle.cpp
index e979ddfa85..2ad91b1e49 100644
--- a/SYCL/SubGroup/generic-shuffle.cpp
+++ b/SYCL/SubGroup/generic-shuffle.cpp
@@ -224,13 +224,13 @@ int main() {
   };
   check_struct<class KernelName_zHfIPOLOFsXiZiCvG, std::complex<float>>(
       Queue, ComplexFloatGenerator);
-
+#ifdef ENABLE_FP64
   auto ComplexDoubleGenerator = [state = std::complex<double>(0, 1)]() mutable {
     return state += std::complex<double>(2, 2);
   };
   check_struct<class KernelName_CjlHUmnuxWtyejZFD, std::complex<double>>(
       Queue, ComplexDoubleGenerator);
-
+#endif
   std::cout << "Test passed." << std::endl;
   return 0;
 }
diff --git a/SYCL/SubGroup/generic-shuffle_aspect_fp64.cpp b/SYCL/SubGroup/generic-shuffle_aspect_fp64.cpp
new file mode 100644
index 0000000000..fef972a51c
--- /dev/null
+++ b/SYCL/SubGroup/generic-shuffle_aspect_fp64.cpp
@@ -0,0 +1,20 @@
+// Enable FP64 part of <generic-shuffle.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+//==-------------- generic_shuffle_aspect-fp64.cpp -*- C++ -*---------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "generic-shuffle.cpp"
diff --git a/SYCL/SubGroup/info.cpp b/SYCL/SubGroup/info.cpp
index 74e6349849..e44afdd5ad 100644
--- a/SYCL/SubGroup/info.cpp
+++ b/SYCL/SubGroup/info.cpp
@@ -40,7 +40,11 @@ int main() {
     auto Kernel = KB.get_kernel(KernelID);
     range<2> GlobalRange{50, 40};
 
+#ifdef ENABLE_FP64
     buffer<double, 2> ABuf{GlobalRange}, BBuf{GlobalRange}, CBuf{GlobalRange};
+#else
+    buffer<float, 2> ABuf{GlobalRange}, BBuf{GlobalRange}, CBuf{GlobalRange};
+#endif
 
     Queue.submit([&](sycl::handler &cgh) {
       auto A = ABuf.get_access<access::mode::read_write>(cgh);
diff --git a/SYCL/SubGroup/info_aspect_fp64.cpp b/SYCL/SubGroup/info_aspect_fp64.cpp
new file mode 100644
index 0000000000..b7a1589798
--- /dev/null
+++ b/SYCL/SubGroup/info_aspect_fp64.cpp
@@ -0,0 +1,16 @@
+// See https://github.com/intel/llvm/issues/2922 for more info
+// UNSUPPORTED: cuda || hip
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+//==------------- info.cpp - SYCL sub_group parameters test ----*- C++ -*---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "info.cpp"
diff --git a/SYCL/SubGroup/load_store.cpp b/SYCL/SubGroup/load_store.cpp
index 9a1ea2a9f0..7033a62813 100644
--- a/SYCL/SubGroup/load_store.cpp
+++ b/SYCL/SubGroup/load_store.cpp
@@ -262,6 +262,7 @@ int main() {
     check<aligned_ulong, 4>(Queue);
     check<aligned_ulong, 8>(Queue);
     check<aligned_ulong, 16>(Queue);
+#ifdef ENABLE_FP64
     typedef double aligned_double __attribute__((aligned(16)));
     check<aligned_double>(Queue);
     check<aligned_double, 1>(Queue);
@@ -270,6 +271,7 @@ int main() {
     check<aligned_double, 4>(Queue);
     check<aligned_double, 8>(Queue);
     check<aligned_double, 16>(Queue);
+#endif
   }
   std::cout << "Test passed." << std::endl;
   return 0;
diff --git a/SYCL/SubGroup/load_store_aspect_fp64.cpp b/SYCL/SubGroup/load_store_aspect_fp64.cpp
new file mode 100644
index 0000000000..02b184d89e
--- /dev/null
+++ b/SYCL/SubGroup/load_store_aspect_fp64.cpp
@@ -0,0 +1,24 @@
+// Enable FP64 part of <load_store.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_SubgroupBlockReadINTEL, __spirv_SubgroupBlockWriteINTEL on
+// AMD
+// XFAIL: hip_amd
+//
+//==----- load_store_aspect_fp64.cpp - SYCL sub_group load/store test ------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "load_store.cpp"
diff --git a/SYCL/SubGroup/shuffle_fp64.cpp b/SYCL/SubGroup/shuffle_fp64.cpp
index cc6baeb6e3..87bcaa02b9 100644
--- a/SYCL/SubGroup/shuffle_fp64.cpp
+++ b/SYCL/SubGroup/shuffle_fp64.cpp
@@ -1,3 +1,4 @@
+// REQUIRES: aspect-fp64
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
diff --git a/SYCL/USM/copy.cpp b/SYCL/USM/copy.cpp
index 2613326756..3d9786548f 100644
--- a/SYCL/USM/copy.cpp
+++ b/SYCL/USM/copy.cpp
@@ -28,12 +28,19 @@ struct test_struct {
   long long d;
   half e;
   float f;
+#ifdef ENABLE_FP64
   double g;
+#endif
 };
 
 bool operator==(const test_struct &lhs, const test_struct &rhs) {
+#ifdef ENABLE_FP64
   return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
          lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g;
+#else
+  return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
+         lhs.e == rhs.e && lhs.f == rhs.f;
+#endif
 }
 
 template <typename T> T *regular(queue q, alloc kind) {
@@ -88,74 +95,90 @@ int main() {
   queue q;
   auto dev = q.get_device();
 
+#ifdef ENABLE_FP64
   test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f, 4.24242};
+#else
+  test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f};
+#endif
 
   if (dev.has(aspect::usm_host_allocations)) {
+#ifdef ENABLE_FP64
+    runTests<double>(q, 4.24242, alloc::host, alloc::host);
+#endif
     runTests<short>(q, 4, alloc::host, alloc::host);
     runTests<int>(q, 42, alloc::host, alloc::host);
     runTests<long>(q, 424, alloc::host, alloc::host);
     runTests<long long>(q, 4242, alloc::host, alloc::host);
     runTests<half>(q, half(4.2f), alloc::host, alloc::host);
     runTests<float>(q, 4.242f, alloc::host, alloc::host);
-    runTests<double>(q, 4.24242, alloc::host, alloc::host);
     runTests<test_struct>(q, test_obj, alloc::host, alloc::host);
   }
 
   if (dev.has(aspect::usm_shared_allocations)) {
+#ifdef ENABLE_FP64
+    runTests<double>(q, 4.24242, alloc::shared, alloc::shared);
+#endif
     runTests<short>(q, 4, alloc::shared, alloc::shared);
     runTests<int>(q, 42, alloc::shared, alloc::shared);
     runTests<long>(q, 424, alloc::shared, alloc::shared);
     runTests<long long>(q, 4242, alloc::shared, alloc::shared);
     runTests<half>(q, half(4.2f), alloc::shared, alloc::shared);
     runTests<float>(q, 4.242f, alloc::shared, alloc::shared);
-    runTests<double>(q, 4.24242, alloc::shared, alloc::shared);
     runTests<test_struct>(q, test_obj, alloc::shared, alloc::shared);
   }
 
   if (dev.has(aspect::usm_device_allocations)) {
+#ifdef ENABLE_FP64
+    runTests<double>(q, 4.24242, alloc::device, alloc::device);
+#endif
     runTests<short>(q, 4, alloc::device, alloc::device);
     runTests<int>(q, 42, alloc::device, alloc::device);
     runTests<long>(q, 424, alloc::device, alloc::device);
     runTests<long long>(q, 4242, alloc::device, alloc::device);
     runTests<half>(q, half(4.2f), alloc::device, alloc::device);
     runTests<float>(q, 4.242f, alloc::device, alloc::device);
-    runTests<double>(q, 4.24242, alloc::device, alloc::device);
     runTests<test_struct>(q, test_obj, alloc::device, alloc::device);
   }
 
   if (dev.has(aspect::usm_host_allocations) &&
       dev.has(aspect::usm_shared_allocations)) {
+#ifdef ENABLE_FP64
+    runTests<double>(q, 4.24242, alloc::host, alloc::shared);
+#endif
     runTests<short>(q, 4, alloc::host, alloc::shared);
     runTests<int>(q, 42, alloc::host, alloc::shared);
     runTests<long>(q, 424, alloc::host, alloc::shared);
     runTests<long long>(q, 4242, alloc::host, alloc::shared);
     runTests<half>(q, half(4.2f), alloc::host, alloc::shared);
     runTests<float>(q, 4.242f, alloc::host, alloc::shared);
-    runTests<double>(q, 4.24242, alloc::host, alloc::shared);
     runTests<test_struct>(q, test_obj, alloc::host, alloc::shared);
   }
 
   if (dev.has(aspect::usm_host_allocations) &&
       dev.has(aspect::usm_device_allocations)) {
+#ifdef ENABLE_FP64
+    runTests<double>(q, 4.24242, alloc::host, alloc::device);
+#endif
     runTests<short>(q, 4, alloc::host, alloc::device);
     runTests<int>(q, 42, alloc::host, alloc::device);
     runTests<long>(q, 424, alloc::host, alloc::device);
     runTests<long long>(q, 4242, alloc::host, alloc::device);
     runTests<half>(q, half(4.2f), alloc::host, alloc::device);
     runTests<float>(q, 4.242f, alloc::host, alloc::device);
-    runTests<double>(q, 4.24242, alloc::host, alloc::device);
     runTests<test_struct>(q, test_obj, alloc::host, alloc::device);
   }
 
   if (dev.has(aspect::usm_shared_allocations) &&
       dev.has(aspect::usm_device_allocations)) {
+#ifdef ENABLE_FP64
+    runTests<double>(q, 4.24242, alloc::shared, alloc::device);
+#endif
     runTests<short>(q, 4, alloc::shared, alloc::device);
     runTests<int>(q, 42, alloc::shared, alloc::device);
     runTests<long>(q, 424, alloc::shared, alloc::device);
     runTests<long long>(q, 4242, alloc::shared, alloc::device);
     runTests<half>(q, half(4.2f), alloc::shared, alloc::device);
     runTests<float>(q, 4.242f, alloc::shared, alloc::device);
-    runTests<double>(q, 4.24242, alloc::shared, alloc::device);
     runTests<test_struct>(q, test_obj, alloc::shared, alloc::device);
   }
 
diff --git a/SYCL/USM/copy_aspect_fp64.cpp b/SYCL/USM/copy_aspect_fp64.cpp
new file mode 100644
index 0000000000..b02bbbf52a
--- /dev/null
+++ b/SYCL/USM/copy_aspect_fp64.cpp
@@ -0,0 +1,21 @@
+//==--------------- copy_aspect-fp64.cp - USM copy test --------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Enable FP64 part of <copy.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t1.out
+// RUN: %HOST_RUN_PLACEHOLDER %t1.out
+// RUN: %CPU_RUN_PLACEHOLDER %t1.out
+// RUN: %GPU_RUN_PLACEHOLDER %t1.out
+// RUN: %ACC_RUN_PLACEHOLDER %t1.out
+
+#include "copy.cpp";
diff --git a/SYCL/USM/fill.cpp b/SYCL/USM/fill.cpp
index c3b96abcad..2f37ad58d2 100644
--- a/SYCL/USM/fill.cpp
+++ b/SYCL/USM/fill.cpp
@@ -28,12 +28,19 @@ struct test_struct {
   long long d;
   sycl::half e;
   float f;
+#ifdef ENABLE_FP64
   double g;
+#endif
 };
 
 bool operator==(const test_struct &lhs, const test_struct &rhs) {
+#ifdef ENABLE_FP64
   return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
          lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g;
+#else
+  return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
+         lhs.e == rhs.e && lhs.f == rhs.f;
+#endif
 }
 
 template <typename T>
@@ -125,38 +132,48 @@ int main() {
   auto dev = q.get_device();
   auto ctxt = q.get_context();
 
+#ifdef ENABLE_FP64
   test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242, 4.24242};
+#else
+  test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242};
+#endif
 
   if (dev.get_info<info::device::usm_host_allocations>()) {
+#ifdef ENABLE_FP64
+    runHostTests<double>(dev, ctxt, q, 4.24242);
+#endif
     runHostTests<short>(dev, ctxt, q, 4);
     runHostTests<int>(dev, ctxt, q, 42);
     runHostTests<long>(dev, ctxt, q, 424);
     runHostTests<long long>(dev, ctxt, q, 4242);
     runHostTests<sycl::half>(dev, ctxt, q, sycl::half(4.2f));
     runHostTests<float>(dev, ctxt, q, 4.242f);
-    runHostTests<double>(dev, ctxt, q, 4.24242);
     runHostTests<test_struct>(dev, ctxt, q, test_obj);
   }
 
   if (dev.get_info<info::device::usm_shared_allocations>()) {
+#ifdef ENABLE_FP64
+    runSharedTests<double>(dev, ctxt, q, 4.24242);
+#endif
     runSharedTests<short>(dev, ctxt, q, 4);
     runSharedTests<int>(dev, ctxt, q, 42);
     runSharedTests<long>(dev, ctxt, q, 424);
     runSharedTests<long long>(dev, ctxt, q, 4242);
     runSharedTests<sycl::half>(dev, ctxt, q, sycl::half(4.2f));
     runSharedTests<float>(dev, ctxt, q, 4.242f);
-    runSharedTests<double>(dev, ctxt, q, 4.24242);
     runSharedTests<test_struct>(dev, ctxt, q, test_obj);
   }
 
   if (dev.get_info<info::device::usm_device_allocations>()) {
+#ifdef ENABLE_FP64
+    runDeviceTests<double>(dev, ctxt, q, 4.24242);
+#endif
     runDeviceTests<short>(dev, ctxt, q, 4);
     runDeviceTests<int>(dev, ctxt, q, 42);
     runDeviceTests<long>(dev, ctxt, q, 420);
     runDeviceTests<long long>(dev, ctxt, q, 4242);
     runDeviceTests<sycl::half>(dev, ctxt, q, sycl::half(4.2f));
     runDeviceTests<float>(dev, ctxt, q, 4.242f);
-    runDeviceTests<double>(dev, ctxt, q, 4.24242);
     runDeviceTests<test_struct>(dev, ctxt, q, test_obj);
   }
 
diff --git a/SYCL/USM/fill_aspect_fp64.cpp b/SYCL/USM/fill_aspect_fp64.cpp
new file mode 100644
index 0000000000..c542bd7c9e
--- /dev/null
+++ b/SYCL/USM/fill_aspect_fp64.cpp
@@ -0,0 +1,21 @@
+//==------- fill_aspect_fp64.cpp - USM fill test for double type -----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Enable FP64 part of <fill.cpp>. To be removed once DPC++
+// supports optional device features and the code could be enabled
+// unconditionally without causing failures in speculative compilation
+// of the kernels.
+//
+// REQUIRES: aspect-fp64
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t1.out
+// RUN: %HOST_RUN_PLACEHOLDER %t1.out
+// RUN: %CPU_RUN_PLACEHOLDER %t1.out
+// RUN: %GPU_RUN_PLACEHOLDER %t1.out
+// RUN: %ACC_RUN_PLACEHOLDER %t1.out
+
+#include "fill.cpp";