diff --git a/SYCL/AtomicRef/assignment_atomic64.cpp b/SYCL/AtomicRef/assignment_atomic64.cpp index f1ef951b0d..1ed7262749 100644 --- a/SYCL/AtomicRef/assignment_atomic64.cpp +++ b/SYCL/AtomicRef/assignment_atomic64.cpp @@ -16,7 +16,9 @@ int main() { } constexpr int N = 32; +#ifdef ENABLE_FP64 assignment_test(q, N); +#endif // Include long tests if they are 64 bits wide if constexpr (sizeof(long) == 8) { diff --git a/SYCL/AtomicRef/assignment_atomic64_aspect_fp64.cpp b/SYCL/AtomicRef/assignment_atomic64_aspect_fp64.cpp new file mode 100644 index 0000000000..0dce0ba117 --- /dev/null +++ b/SYCL/AtomicRef/assignment_atomic64_aspect_fp64.cpp @@ -0,0 +1,16 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// XFAIL: hip +// Expected failure because hip does not have atomic64 check implementation + +#include "assignment_atomic64.cpp" diff --git a/SYCL/AtomicRef/assignment_atomic64_generic.cpp b/SYCL/AtomicRef/assignment_atomic64_generic.cpp index 3bc735c2c1..8e18d356cc 100644 --- a/SYCL/AtomicRef/assignment_atomic64_generic.cpp +++ b/SYCL/AtomicRef/assignment_atomic64_generic.cpp @@ -19,8 +19,9 @@ int main() { } constexpr int N = 32; +#ifdef ENABLE_FP64 assignment_generic_test(q, N); - +#endif // Include long tests if they are 64 bits wide if constexpr (sizeof(long) == 8) { assignment_generic_test(q, N); @@ -37,6 +38,5 @@ int main() { if constexpr (sizeof(char *) == 8) { assignment_generic_test(q, N); } - std::cout << "Test passed." << std::endl; } diff --git a/SYCL/AtomicRef/assignment_atomic64_generic_aspect_fp64.cpp b/SYCL/AtomicRef/assignment_atomic64_generic_aspect_fp64.cpp new file mode 100644 index 0000000000..fb5feaace7 --- /dev/null +++ b/SYCL/AtomicRef/assignment_atomic64_generic_aspect_fp64.cpp @@ -0,0 +1,16 @@ +// Enable FP64 part of . To be removed once +// DPC++ supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// CUDA backend has had no support for the generic address space yet +// XFAIL: cuda || hip + +#include "assignment_atomic64_generic.cpp" diff --git a/SYCL/Basic/buffer/buffer.cpp b/SYCL/Basic/buffer/buffer.cpp index 5ced4c9a04..913fe015c1 100644 --- a/SYCL/Basic/buffer/buffer.cpp +++ b/SYCL/Basic/buffer/buffer.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// +#include + #include -#include #include -#include using namespace sycl; @@ -513,15 +513,22 @@ int main() { [](bool *data) { delete[] data; }); std::shared_ptr int_shrd(new int[size], [](int *data) { delete[] data; }); +#ifdef ENABLE_FP64 std::shared_ptr double_shrd(new double[size], [](double *data) { delete[] data; }); +#endif std::vector bool_vector; std::vector int_vector; +#ifdef ENABLE_FP64 std::vector double_vector; +#endif + bool_vector.reserve(size); int_vector.reserve(size); +#ifdef ENABLE_FP64 double_vector.reserve(size); +#endif sycl::queue Queue; std::mutex m; @@ -532,43 +539,58 @@ int main() { sycl::buffer buf_int_shrd( int_shrd, r, sycl::property_list{sycl::property::buffer::use_mutex(m)}); +#ifdef ENABLE_FP64 sycl::buffer buf_double_shrd( double_shrd, r, sycl::property_list{sycl::property::buffer::use_mutex(m)}); +#endif m.lock(); std::fill(bool_shrd.get(), (bool_shrd.get() + size), bool()); std::fill(int_shrd.get(), (int_shrd.get() + size), int()); +#ifdef ENABLE_FP64 std::fill(double_shrd.get(), (double_shrd.get() + size), double()); +#endif m.unlock(); buf_bool_shrd.set_final_data(bool_vector.begin()); buf_int_shrd.set_final_data(int_vector.begin()); +#ifdef ENABLE_FP64 buf_double_shrd.set_final_data(double_vector.begin()); +#endif + buf_bool_shrd.set_write_back(true); buf_int_shrd.set_write_back(true); +#ifdef ENABLE_FP64 buf_double_shrd.set_write_back(true); +#endif Queue.submit([&](sycl::handler &cgh) { auto Accessor_bool = buf_bool_shrd.get_access(cgh); auto Accessor_int = buf_int_shrd.get_access(cgh); +#ifdef ENABLE_FP64 auto Accessor_double = buf_double_shrd.get_access(cgh); +#endif cgh.parallel_for(r, [=](sycl::id<1> WIid) { Accessor_bool[WIid] = true; Accessor_int[WIid] = 3; +#ifdef ENABLE_FP64 Accessor_double[WIid] = 7.5; +#endif }); }); } // Data is copied back for (size_t i = 0; i < size; i++) { - if (bool_vector[i] != true || int_vector[i] != 3 || - double_vector[i] != 7.5) { - assert(false && "Data was not copied back"); - return 1; - } + bool Passed = true; + Passed &= (bool_vector[i] == true); + Passed &= (int_vector[i] == 3); +#ifdef ENABLE_FP64 + Passed &= (double_vector[i] == 7.5); +#endif + assert(Passed && "Data was not copied back"); } } diff --git a/SYCL/Basic/buffer/buffer_aspect_fp64.cpp b/SYCL/Basic/buffer/buffer_aspect_fp64.cpp new file mode 100644 index 0000000000..c4232caf5a --- /dev/null +++ b/SYCL/Basic/buffer/buffer_aspect_fp64.cpp @@ -0,0 +1,23 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx %cxx_std_optionc++17 -DENABLE_FP64 %s -o %t1.out %sycl_options +// RUN: %HOST_RUN_PLACEHOLDER %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t2.out +// RUN: %HOST_RUN_PLACEHOLDER %t2.out +// RUN: %CPU_RUN_PLACEHOLDER %t2.out +// RUN: %GPU_RUN_PLACEHOLDER %t2.out +// RUN: %ACC_RUN_PLACEHOLDER %t2.out + +//==------------------- buffer.cpp - SYCL buffer basic test ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "buffer.cpp" diff --git a/SYCL/DeviceLib/built-ins/nan.cpp b/SYCL/DeviceLib/built-ins/nan.cpp index caea23f560..5a07762f5f 100644 --- a/SYCL/DeviceLib/built-ins/nan.cpp +++ b/SYCL/DeviceLib/built-ins/nan.cpp @@ -41,13 +41,14 @@ template void check_nan(s::queue &Queue) { int main() { test_nan_call(); test_nan_call(); - test_nan_call(); - test_nan_call(); test_nan_call(); test_nan_call(); +#ifdef ENABLE_FP64 + test_nan_call(); + test_nan_call(); test_nan_call(); test_nan_call(); - +#endif s::queue Queue([](sycl::exception_list ExceptionList) { for (std::exception_ptr ExceptionPtr : ExceptionList) { try { @@ -63,10 +64,13 @@ int main() { if (Queue.get_device().has(sycl::aspect::fp16)) check_nan(Queue); #endif + check_nan(Queue); +#ifdef ENABLE_FP64 if (Queue.get_device().has(sycl::aspect::fp64)) { check_nan(Queue); check_nan(Queue); } +#endif return 0; } diff --git a/SYCL/DeviceLib/built-ins/nan_aspect_fp64.cpp b/SYCL/DeviceLib/built-ins/nan_aspect_fp64.cpp new file mode 100644 index 0000000000..684e568924 --- /dev/null +++ b/SYCL/DeviceLib/built-ins/nan_aspect_fp64.cpp @@ -0,0 +1,13 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include "nan.cpp" diff --git a/SYCL/ESIMD/aot_mixed.cpp b/SYCL/ESIMD/aot_mixed.cpp index 0f237a558a..366abf19d8 100644 --- a/SYCL/ESIMD/aot_mixed.cpp +++ b/SYCL/ESIMD/aot_mixed.cpp @@ -8,9 +8,9 @@ // REQUIRES: gpu // UNSUPPORTED: cuda || hip // UNSUPPORTED: esimd_emulator -// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" -o %t.sycl.out -DENABLE_SYCL=0 %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen12lp" -o %t.sycl.out -DENABLE_SYCL=0 %s // RUN: %GPU_RUN_PLACEHOLDER %t.sycl.out -// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device gen12lp" -o %t.out %s // RUN: %GPU_RUN_PLACEHOLDER %t.out // This test checks the following ESIMD ahead-of-time compilation scenarios: diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp index da908391f8..c145ecc3f1 100644 --- a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp +++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp @@ -265,12 +265,17 @@ int main(void) { auto arith_ops = esimd_test::ArithBinaryOpsNoDiv; passed &= test(arith_ops, q); passed &= test(arith_ops, q, 0.000001f); +#ifdef ENABLE_FP64 passed &= test(arith_ops, q, 1e-15); +#endif passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 1); passed &= test(arith_ops, q, 1); - passed &= test(arith_ops, q); passed &= test(arith_ops, q); +#ifdef ENABLE_FP64 + passed &= test(arith_ops, q); + passed &= test(arith_ops, q); +#endif #ifdef USE_BF16 passed &= test(arith_ops, q); passed &= test(arith_ops, q, 0.03); @@ -326,12 +331,17 @@ int main(void) { auto cmp_ops = esimd_test::CmpOps; passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q); +#ifdef ENABLE_FP64 + passed &= test(arith_ops, q, 1e-15); +#endif passed &= test(cmp_ops, q); passed &= test(cmp_ops, q, 1); passed &= test(cmp_ops, q, 1); - passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); +#ifdef ENABLE_FP64 + passed &= test(cmp_ops, q); + passed &= test(cmp_ops, q); +#endif #ifdef USE_BF16 passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy_aspect_fp64.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy_aspect_fp64.cpp new file mode 100644 index 0000000000..d9d2b256bb --- /dev/null +++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy_aspect_fp64.cpp @@ -0,0 +1,31 @@ +//==-- bin_un_cmp_ops_heavy_aspect-fp64.cpp - DPC++ ESIMD on-device test --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64, gpu +// UNSUPPORTED: cuda || hip +// TODO: esimd_emulator fails due to unimplemented 'half' type +// XFAIL: esimd_emulator +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// Tests various binary operations applied to simd objects. + +// TODO +// Arithmetic operations behaviour depends on Gen's control regiter's rounding +// mode, which is RTNE by default: +// cr0.5:4 is 00b = Round to Nearest or Even (RTNE) +// For half this leads to divergence between Gen and host (emulated) results +// larger than certain threshold. Might need to tune the cr0 once this feature +// is available in ESIMD. +// + +#include "bin_and_cmp_ops_heavy.cpp" diff --git a/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp b/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp index 39f9be9fcd..3cd6e5c556 100644 --- a/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp +++ b/SYCL/ESIMD/api/functional/ctors/ctor_converting_fp_extra.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu, level_zero +// REQUIRES: gpu, level_zero, aspect-fp64 // XREQUIRES: gpu // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet. // The current "REQUIRES" should be replaced with "gpu" only as mentioned in diff --git a/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp b/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp index 797ff009c8..4cf2aa7add 100644 --- a/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp +++ b/SYCL/ESIMD/api/functional/ctors/ctor_load_acc_fp_extra.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu, level_zero +// REQUIRES: gpu, level_zero, aspect-fp64 // XREQUIRES: gpu // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet. // The current "REQUIRES" should be replaced with "gpu" only as mentioned in diff --git a/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp b/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp index 4b960879c1..4e8bb1414f 100644 --- a/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp +++ b/SYCL/ESIMD/api/functional/ctors/ctor_load_usm_fp_extra.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu, level_zero +// REQUIRES: gpu, level_zero, aspect-fp64 // XREQUIRES: gpu // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet. // The current "REQUIRES" should be replaced with "gpu" only as mentioned in diff --git a/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp b/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp index cc19d2cca7..84fac0b20b 100644 --- a/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp +++ b/SYCL/ESIMD/api/functional/functions/functions_select_2d.hpp @@ -224,8 +224,10 @@ bool run_test_for_types(sycl::queue &queue) { const auto great_size = get_dimensions(); #ifdef SIMD_RUN_TEST_WITH_SYCL_HALF_TYPE const auto all_types = get_tested_types(); -#else +#elif SIMD_RUN_TEST_WITH_SYCL_DOUBLE_TYPE const auto all_types = named_type_pack::generate("double"); +#else + const auto all_types = named_type_pack::generate("float"); #endif // Verify correctness for different select sizes. diff --git a/SYCL/ESIMD/api/functional/functions/functions_select_2d_core_aspect_fp64.cpp b/SYCL/ESIMD/api/functional/functions/functions_select_2d_core_aspect_fp64.cpp new file mode 100644 index 0000000000..73071ff8ca --- /dev/null +++ b/SYCL/ESIMD/api/functional/functions/functions_select_2d_core_aspect_fp64.cpp @@ -0,0 +1,41 @@ +//==- functions_select_2d_core_aspect_fp64.cpp - DPC++ ESIMD on-device test ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu, level_zero, aspect-fp64 +// XREQUIRES: gpu +// TODO gpu and level_zero in REQUIRES due to only this platforms supported yet. +// The current "REQUIRES" should be replaced with "gpu" only as mentioned in +// "XREQUIRES". +// UNSUPPORTED: cuda, hip +// RUN: %clangxx -fsycl -DSIMD_RUN_TEST_WITH_SYCL_DOUBLE_TYPE %s -fsycl-device-code-split=per_kernel -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// Test for simd select for 2d function. +// The test creates source simd instance with reference data and invokes logical +// not operator, using core data types. +// The test verifies that selected values can be changed with avoid to change +// values, that hasn't beed selected. + +#include "functions_select_2d.hpp" + +using namespace sycl::ext::intel::experimental::esimd; +using namespace esimd_test::api::functional; + +int main(int, char **) { + sycl::queue queue(esimd_test::ESIMDSelector{}, + esimd_test::createExceptionHandler()); + + if (!queue.get_device().has(sycl::aspect::fp64) { + std::cout << "Skipping test\n"; + return 0; + } + + bool passed = functions::run_test_for_types(queue); + + std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp index 5e38a3b7cf..bba6deaa45 100644 --- a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp +++ b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_accuracy_fp_extra.cpp @@ -6,7 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu, level_zero +// REQUIRES: gpu, level_zero, aspect-fp64 // XREQUIRES: gpu // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet. // The current "REQUIRES" should be replaced with "gpu" only as mentioned in diff --git a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp index 7f88d0ad96..87355f845d 100644 --- a/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp +++ b/SYCL/ESIMD/api/functional/operators/operator_decrement_and_increment_fp_extra.cpp @@ -6,7 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu, level_zero +// REQUIRES: gpu, level_zero, aspect-fp64 // XREQUIRES: gpu // TODO gpu and level_zero in REQUIRES due to only this platforms supported yet. // The current "REQUIRES" should be replaced with "gpu" only as mentioned in diff --git a/SYCL/ESIMD/api/replicate_smoke.cpp b/SYCL/ESIMD/api/replicate_smoke.cpp index c9985edc93..194c5c227a 100644 --- a/SYCL/ESIMD/api/replicate_smoke.cpp +++ b/SYCL/ESIMD/api/replicate_smoke.cpp @@ -186,7 +186,9 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); passed &= test(q); +#ifdef ENABLE_FP64 passed &= test(q); +#endif std::cout << (passed ? "Test passed\n" : "Test FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/api/replicate_smoke_aspect_fp64.cpp b/SYCL/ESIMD/api/replicate_smoke_aspect_fp64.cpp new file mode 100644 index 0000000000..ad587c4df8 --- /dev/null +++ b/SYCL/ESIMD/api/replicate_smoke_aspect_fp64.cpp @@ -0,0 +1,19 @@ +//==------- replicate_smoke.cpp - DPC++ ESIMD on-device test --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +#include "replicate_smoke.cpp" diff --git a/SYCL/ESIMD/api/saturation_smoke.cpp b/SYCL/ESIMD/api/saturation_smoke.cpp index e6aa049160..789224963f 100644 --- a/SYCL/ESIMD/api/saturation_smoke.cpp +++ b/SYCL/ESIMD/api/saturation_smoke.cpp @@ -188,7 +188,9 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); passed &= test(q); +#ifdef ENABLE_FP64 passed &= test(q); +#endif passed &= test(q); passed &= test(q); @@ -205,7 +207,9 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); +#ifdef ENABLE_FP64 passed &= test(q); +#endif std::cout << (passed ? "Test passed\n" : "Test FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/api/saturation_smoke_aspect_fp64.cpp b/SYCL/ESIMD/api/saturation_smoke_aspect_fp64.cpp new file mode 100644 index 0000000000..c1c0180911 --- /dev/null +++ b/SYCL/ESIMD/api/saturation_smoke_aspect_fp64.cpp @@ -0,0 +1,22 @@ +//==---- saturation_smoke_aspect_fp64.cpp - DPC++ ESIMD on-device test ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// TODO: esimd_emulator fails due to unimplemented 'half' type +// XFAIL: esimd_emulator +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// The test checks main functionality of esimd::saturate function. + +#include "saturation_smoke.cpp" diff --git a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp index c397945845..7be06cdd51 100644 --- a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp +++ b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp @@ -23,7 +23,9 @@ int main(int argc, char **argv) { bool passed = true; passed &= test(q); passed &= test(q); +#ifdef ENABLE_FP64 passed &= test(q); +#endif std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/api/simd_view_select_2d_fp_aspect_fp64.cpp b/SYCL/ESIMD/api/simd_view_select_2d_fp_aspect_fp64.cpp new file mode 100644 index 0000000000..5b46c63cdf --- /dev/null +++ b/SYCL/ESIMD/api/simd_view_select_2d_fp_aspect_fp64.cpp @@ -0,0 +1,23 @@ +//==- simd_view_select_2d_fp_aspect_fp64.cpp - DPC++ ESIMD on-device test -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// TODO: esimd_emulator fails due to unimplemented 'single_task()' method +// XFAIL: esimd_emulator +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -fsycl-device-code-split=per_kernel -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// Smoke test for 2D region select API which can be used to represent 2D tiles. +// Tests FP types. + +#include "simd_view_select_2d.cpp" diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp index bc86616e61..8797f345ad 100644 --- a/SYCL/ESIMD/api/unary_ops_heavy.cpp +++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp @@ -173,16 +173,20 @@ int main(void) { passed &= test(mod_ops, q); passed &= test(mod_ops, q); passed &= test(mod_ops, q); +#ifdef ENABLE_FP64 passed &= test(mod_ops, q); +#endif - auto signed_ops = esimd_test::OpSeq{}; - passed &= test(signed_ops, q); - passed &= test(signed_ops, q); - passed &= test(signed_ops, q); - passed &= test(signed_ops, q); - passed &= test(signed_ops, q); - passed &= test(signed_ops, q); - passed &= test(signed_ops, q); + auto singed_ops = esimd_test::OpSeq{}; + passed &= test(singed_ops, q); + passed &= test(singed_ops, q); + passed &= test(singed_ops, q); + passed &= test(singed_ops, q); + passed &= test(singed_ops, q); + passed &= test(singed_ops, q); +#ifdef ENABLE_FP64 + passed &= test(singed_ops, q); +#endif #ifdef USE_BF16 // TODO: the rest unary operations are not yet supported for bfloat16 on host. diff --git a/SYCL/ESIMD/api/unary_ops_heavy_aspect_fp64.cpp b/SYCL/ESIMD/api/unary_ops_heavy_aspect_fp64.cpp new file mode 100644 index 0000000000..dbf71de392 --- /dev/null +++ b/SYCL/ESIMD/api/unary_ops_heavy_aspect_fp64.cpp @@ -0,0 +1,20 @@ +//==------ unary_ops_heavy_aspect_fp64.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// TODO: esimd_emulator fails due to unimplemented 'half' type +// XFAIL: esimd_emulator +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "unary_ops_heavy.cpp" diff --git a/SYCL/ESIMD/ext_math.cpp b/SYCL/ESIMD/ext_math.cpp index c515cbd29a..e53a534f88 100644 --- a/SYCL/ESIMD/ext_math.cpp +++ b/SYCL/ESIMD/ext_math.cpp @@ -467,17 +467,23 @@ int main(void) { Pass &= testESIMD(Q); Pass &= testESIMD(Q); Pass &= testESIMD(Q); + Pass &= testSYCL(Q); + Pass &= testSYCL(Q); + Pass &= testESIMDPow(Q); + Pass &= testESIMDPow(Q); if (Q.get_backend() != sycl::backend::ext_intel_esimd_emulator) { // ESIMD_EMULATOR supports only ESIMD kernels Pass &= testSYCL(Q); Pass &= testSYCL(Q); } Pass &= testESIMDSqrtIEEE(Q); +#ifdef ENABLE_FP64 Pass &= testESIMDSqrtIEEE(Q); +#endif Pass &= testESIMDDivIEEE(Q); +#ifdef ENABLE_FP64 Pass &= testESIMDDivIEEE(Q); - Pass &= testESIMDPow(Q); - Pass &= testESIMDPow(Q); +#endif std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n"); return Pass ? 0 : 1; } diff --git a/SYCL/ESIMD/ext_math_aspect_fp64.cpp b/SYCL/ESIMD/ext_math_aspect_fp64.cpp new file mode 100644 index 0000000000..e519e916ea --- /dev/null +++ b/SYCL/ESIMD/ext_math_aspect_fp64.cpp @@ -0,0 +1,20 @@ +//==----- ext_math_aspect_fp64.cpp - DPC++ ESIMD extended math test -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// TODO: esimd_emulator fails due to unimplemented 'half' type +// XFAIL: esimd_emulator +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "ext_math.cpp" diff --git a/SYCL/ESIMD/regression/Inputs/dgetrf.hpp b/SYCL/ESIMD/regression/Inputs/dgetrf.hpp index 6b099b38b0..2ccf5f5356 100644 --- a/SYCL/ESIMD/regression/Inputs/dgetrf.hpp +++ b/SYCL/ESIMD/regression/Inputs/dgetrf.hpp @@ -21,7 +21,6 @@ #define ABS(x) ((x) >= 0 ? (x) : -(x)) #define MIN(x, y) ((x) <= (y) ? (x) : (y)) #define MAX(x, y) ((x) >= (y) ? (x) : (y)) -#define FP_RAND ((double)rand() / (double)RAND_MAX) #define OUTN(text, ...) fprintf(stderr, text, ##__VA_ARGS__) #define OUT(text, ...) OUTN(text "\n", ##__VA_ARGS__) @@ -47,15 +46,16 @@ using namespace sycl; using namespace std; using namespace sycl::ext::intel::esimd; -ESIMD_PRIVATE ESIMD_REGISTER(192) simd GRF; +template ESIMD_PRIVATE ESIMD_REGISTER(192) simd GRF; #define V(x, w, i) (x).template select(i) #define V1(x, i) V(x, 1, i) #define V8(x, i) V(x, 8, i) #define BCAST8(x, i) (x).template replicate_w<8, 1>(i) -template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { - auto a = V(GRF, M * N, 0); +template +ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { + auto a = V(GRF, M * N, 0); if (K % 8) { simd_mask<8> mask = 1; @@ -69,7 +69,7 @@ template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { V1(mask, k) = 0; if (ak0[k] != 0.0) { // scal - double temp = 1.0 / ak0[k]; + T temp = 1.0 / ak0[k]; ak0.merge(ak0 * temp, mask); for (int i = 8 + K & (-8); i < M; i += 8) { V8(ak, i) *= temp; @@ -98,7 +98,7 @@ template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { V1(mask, k) = 0; if (ak0[k] != 0.0) { // scal - double temp = 1.0 / ak0[k]; + T temp = 1.0 / ak0[k]; ak0.merge(ak0 * temp, mask); for (int i = 16 + (K & (-8)) + kk; i < M; i += 8) { V8(ak, i) *= temp; @@ -129,7 +129,7 @@ template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { V1(mask, k) = 0; if (ak0[k] != 0.0) { // scal - double temp = 1.0 / ak0[k]; + T temp = 1.0 / ak0[k]; ak0.merge(ak0 * temp, mask); for (int i = 8 + K + kk; i < M; i += 8) { V8(ak, i) *= temp; @@ -159,16 +159,16 @@ template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { // into GRF K - an update rank P0=A[0:M,0:K] = column(F=A[0:K,0:K], // L=A[K:M,0:K]) - panel to update with P1=A[0:M,K:K+N] = column(U=A[0:K,K:K+N], // T=A[K:M,K:K+N]) - panel to be updated -template -ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) { - auto p1 = V(GRF, M * N, 0); - double *a1; +template +ESIMD_INLINE void dgetrfnp_left_step(T *a, int64_t lda, int64_t *info) { + auto p1 = V(GRF, M * N, 0); + T *a1; int i, j, k; // load P1 for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda) for (i = 0; i < M; i += 8) { - simd data; + simd data; data.copy_from(a1 + i); V8(p1, j * M + i) = data; } @@ -178,10 +178,10 @@ ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) { // (gemm) update T=T-L*U for (int kk = 0; kk < K; kk += 8) { simd_mask<8> mask = 1; - simd a0k, aik; + simd a0k, aik; for (k = 0; k < 8 && kk + k < K; k++) { V1(mask, k) = 0; - simd data; + simd data; data.copy_from(a + kk + (kk + k) * lda); V8(a0k, 0) = data; for (j = 0; j < N; j++) { @@ -193,7 +193,7 @@ ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) { } for (k = 0; k < 8 && kk + k < K; k++) { for (i = kk + 8; i < M; i += 8) { - simd data; + simd data; data.copy_from(a + i + (kk + k) * lda); V8(aik, 0) = data; for (j = 0; j < N; j++) { @@ -207,24 +207,25 @@ ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) { } } // (getrf) factorize T=P*L*U - dgetrfnp_panel(info); + dgetrfnp_panel(info); // store P1 for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda) for (i = 0; i < M; i += 8) { - simd vals = V8(p1, j * M + i); + simd vals = V8(p1, j * M + i); vals.copy_to(a1 + i); } } #endif // !USE_REF -ESIMD_INLINE void dgetrfnp_esimd(int64_t m, int64_t n, double *a, int64_t lda, +template +ESIMD_INLINE void dgetrfnp_esimd(int64_t m, int64_t n, T *a, int64_t lda, int64_t *ipiv, int64_t *info) { *info = 0; #if defined(USE_REF) int i, j, k; for (k = 0; k < MIN(m, n); k++) { - double temp = a[k + k * lda]; + T temp = a[k + k * lda]; if (!(*info) && temp == 0.0) *info = k + 1; // scal @@ -243,62 +244,63 @@ ESIMD_INLINE void dgetrfnp_esimd(int64_t m, int64_t n, double *a, int64_t lda, #else // defined(USE_REF) if (m == 8) { if (n == 8) - dgetrfnp_left_step<8, 8, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); } else if (m == 16) { if (n == 8) - dgetrfnp_left_step<16, 8, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); else if (n == 16) - dgetrfnp_left_step<16, 16, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); } else if (m == 32) { if (n == 8) - dgetrfnp_left_step<32, 8, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); else if (n == 12) - dgetrfnp_left_step<32, 12, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); else if (n == 16) { - dgetrfnp_left_step<32, 8, 0>(a, lda, info); - dgetrfnp_left_step<32, 8, 8>(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); } else if (n == 24) { - dgetrfnp_left_step<32, 8, 0>(a, lda, info); - dgetrfnp_left_step<32, 8, 8>(a, lda, info); - dgetrfnp_left_step<32, 8, 16>(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); } else if (n == 32) { - dgetrfnp_left_step<32, 8, 0>(a, lda, info); - dgetrfnp_left_step<32, 8, 8>(a, lda, info); - dgetrfnp_left_step<32, 8, 16>(a, lda, info); - dgetrfnp_left_step<32, 8, 24>(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); } } else if (m == 64) { if (n == 6) - dgetrfnp_left_step<64, 6, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); else if (n == 16) { - dgetrfnp_left_step<64, 6, 0>(a, lda, info); - dgetrfnp_left_step<64, 6, 6>(a, lda, info); - dgetrfnp_left_step<64, 4, 12>(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); } else if (n == 32) { - dgetrfnp_left_step<64, 6, 0>(a, lda, info); - dgetrfnp_left_step<64, 6, 6>(a, lda, info); - dgetrfnp_left_step<64, 6, 12>(a, lda, info); - dgetrfnp_left_step<64, 6, 18>(a, lda, info); - dgetrfnp_left_step<64, 6, 24>(a, lda, info); - dgetrfnp_left_step<64, 2, 30>(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); } else if (n == 64) { - dgetrfnp_left_step<64, 6, 0>(a, lda, info); - dgetrfnp_left_step<64, 6, 6>(a, lda, info); - dgetrfnp_left_step<64, 6, 12>(a, lda, info); - dgetrfnp_left_step<64, 6, 18>(a, lda, info); - dgetrfnp_left_step<64, 6, 24>(a, lda, info); - dgetrfnp_left_step<64, 6, 30>(a, lda, info); - dgetrfnp_left_step<64, 6, 36>(a, lda, info); - dgetrfnp_left_step<64, 6, 42>(a, lda, info); - dgetrfnp_left_step<64, 6, 48>(a, lda, info); - dgetrfnp_left_step<64, 6, 54>(a, lda, info); - dgetrfnp_left_step<64, 4, 60>(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); + dgetrfnp_left_step(a, lda, info); } } #endif // defined(USE_REF) } -void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, +template +void dgetrfnp_batch_strided_c(int64_t m, int64_t n, T *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch, int64_t *info) { @@ -309,11 +311,11 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, CHECK(status = device.is_gpu(), !status); - double *a_gpu; + T *a_gpu; int64_t *ipiv_gpu; int64_t *info_gpu; - CHECK(a_gpu = static_cast( - malloc_shared(stride_a * batch * sizeof(double), device, context)), + CHECK(a_gpu = static_cast( + malloc_shared(stride_a * batch * sizeof(T), device, context)), !a_gpu); CHECK(ipiv_gpu = static_cast(malloc_shared( stride_ipiv * batch * sizeof(int64_t), device, context)), @@ -322,7 +324,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, malloc_shared(batch * sizeof(int64_t), device, context)), !info_gpu); - memcpy(a_gpu, a, stride_a * batch * sizeof(double)); + memcpy(a_gpu, a, stride_a * batch * sizeof(T)); sycl::nd_range<1> range(sycl::range<1>{static_cast(batch)}, sycl::range<1>{1}); @@ -344,7 +346,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, return; } - memcpy(a, a_gpu, stride_a * batch * sizeof(double)); + memcpy(a, a_gpu, stride_a * batch * sizeof(T)); memcpy(ipiv, ipiv_gpu, stride_ipiv * batch * sizeof(int64_t)); memcpy(info, info_gpu, batch * sizeof(int64_t)); @@ -353,14 +355,16 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, free(info_gpu, context); } -static void fp_init(int64_t m, int64_t n, double *a, int64_t lda) { +template +static void fp_init(int64_t m, int64_t n, T *a, int64_t lda) { int64_t i, j; for (j = 0; j < n; j++) for (i = 0; i < m; i++) - a[i + j * lda] = 2.0 * FP_RAND - 1.0; + a[i + j * lda] = 2.0 * ((T)rand() / (T)RAND_MAX) - 1.0; } -static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b, +template +static void fp_copy(int64_t m, int64_t n, T *a, int64_t lda, T *b, int64_t ldb) { int64_t i, j; for (j = 0; j < n; j++) @@ -368,8 +372,9 @@ static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b, b[i + j * ldb] = a[i + j * lda]; } -static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) { - double sum, value = 0.0; +template +static T fp_norm1(int64_t m, int64_t n, T *a, int64_t lda) { + T sum, value = 0.0; int64_t i, j; for (j = 0; j < n; j++) { sum = 0.0; @@ -381,28 +386,35 @@ static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) { return value; } -static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in, - double *a, int64_t lda, - int64_t stride_a, int64_t *ipiv, - int64_t stride_ipiv, int64_t batch, - int64_t *info) { - double thresh = 30.0; +template +static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, T *a_in, T *a, + int64_t lda, int64_t stride_a, + int64_t *ipiv, int64_t stride_ipiv, + int64_t batch, int64_t *info) { + T thresh = 30.0; int fail = 0; int64_t i, j, k, l; char label[1024]; - unsigned char prec_b[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c}; - double res = 0.0, nrm = 0.0, ulp = *(double *)prec_b; - double *w = (double *)malloc(sizeof(double) * MAX(m * n, 1)); + unsigned char prec_b1[] = {0, 0, 0xb0, 0x3c}; + unsigned char prec_b2[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c}; + T res = 0.0, nrm = 0.0, ulp; + if (std::is_same::value) { + ulp = *(T *)prec_b2; + } else { + ulp = *(T *)prec_b1; + } + + T *w = (T *)malloc(sizeof(T) * MAX(m * n, 1)); sprintf(label, "m=%ld, n=%ld, lda=%ld, batch=%ld", m, n, lda, batch); for (k = 0; k < batch; k++) { /* info == 0 */ - CHECK_AND_REPORT("info == 0", label, info[k] != 0, (double)info[k], fail); + CHECK_AND_REPORT("info == 0", label, info[k] != 0, (T)info[k], fail); if (m > 0 && n > 0) { /* | L U - A | / ( |A| n ulp ) */ - memset(w, 0, sizeof(double) * m * n); + memset(w, 0, sizeof(T) * m * n); if (m < n) { for (j = 0; j < n; j++) for (i = 0; i <= j; i++) @@ -429,7 +441,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in, w[i + j * m] -= a_in[k * stride_a + i + j * lda]; res = fp_norm1(m, n, w, m); nrm = fp_norm1(m, n, &a_in[k * stride_a], lda); - nrm *= (double)n * ulp; + nrm *= (T)n * ulp; res /= nrm > 0.0 ? nrm : ulp; CHECK_AND_REPORT("| L U - A | / ( |A| n ulp )", label, FAILED(res, thresh), res, fail); @@ -440,12 +452,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in, return fail; } -void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, - int64_t stride_a, int64_t *ipiv, - int64_t stride_ipiv, int64_t batch, - int64_t *info); - -int main(int argc, char *argv[]) { +template int check(int argc, char *argv[]) { int exit_status = 0; int64_t m = 64, n = 64, lda = 64; int64_t stride_a = lda * n, stride_ipiv = n; @@ -458,10 +465,10 @@ int main(int argc, char *argv[]) { int64_t a_count = MAX(stride_a * batch, 1); int64_t ipiv_count = MAX(stride_ipiv * batch, 1); int64_t info_count = MAX(batch, 1); - double *a = NULL, *a_copy = NULL; + T *a = NULL, *a_copy = NULL; int64_t *ipiv = NULL, *info = NULL; - CHECK(a = (double *)malloc(sizeof(double) * a_count), !a); - CHECK(a_copy = (double *)malloc(sizeof(double) * a_count), !a_copy); + CHECK(a = (T *)malloc(sizeof(T) * a_count), !a); + CHECK(a_copy = (T *)malloc(sizeof(T) * a_count), !a_copy); CHECK(ipiv = (int64_t *)malloc(sizeof(int64_t) * ipiv_count), !ipiv); CHECK(info = (int64_t *)malloc(sizeof(int64_t) * info_count), !info); @@ -486,3 +493,14 @@ int main(int argc, char *argv[]) { } return exit_status; } + +int main(int argc, char *argv[]) { + int Passed = 0; + + Passed += check(argc, argv); +#ifdef ENABLE_FP64 + Passed += check(argc, argv); +#endif + + return Passed; +} diff --git a/SYCL/ESIMD/regression/dgetrf_8x8.cpp b/SYCL/ESIMD/regression/dgetrf_8x8.cpp index 521be63073..4604e78c2e 100644 --- a/SYCL/ESIMD/regression/dgetrf_8x8.cpp +++ b/SYCL/ESIMD/regression/dgetrf_8x8.cpp @@ -23,7 +23,6 @@ #define ABS(x) ((x) >= 0 ? (x) : -(x)) #define MIN(x, y) ((x) <= (y) ? (x) : (y)) #define MAX(x, y) ((x) >= (y) ? (x) : (y)) -#define FP_RAND ((double)rand() / (double)RAND_MAX) #define OUTN(text, ...) fprintf(stderr, text, ##__VA_ARGS__) #define OUT(text, ...) OUTN(text "\n", ##__VA_ARGS__) @@ -49,15 +48,16 @@ using namespace sycl; using namespace std; using namespace sycl::ext::intel::esimd; -ESIMD_PRIVATE ESIMD_REGISTER(384) simd GRF; +template ESIMD_PRIVATE ESIMD_REGISTER(384) simd GRF; #define V(x, w, i) (x).template select(i) #define V1(x, i) V(x, 1, i) #define V8(x, i) V(x, 8, i) #define BCAST8(x, i) (x).template replicate_w<8, 1>(i) -template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { - auto a = V(GRF, M * N, 0); +template +ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { + auto a = V(GRF, M * N, 0); for (int kk = 0; kk < N; kk += 8) { simd_mask<8> mask = 1; for (int k = 0; k < 8 && kk + k < N; k++) { @@ -67,7 +67,7 @@ template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { V1(mask, k) = 0; if (ak0[k] != 0.0) { // scal - double temp = 1.0 / ak0[k]; + T temp = 1.0 / ak0[k]; ak0.merge(ak0 * temp, mask); for (int i = 8 + K + kk; i < M; i += 8) { V8(ak, i) *= temp; @@ -95,37 +95,39 @@ template ESIMD_INLINE void dgetrfnp_panel(int64_t *info) { // into GRF K - an update rank P0=A[0:M,0:K] = column(F=A[0:K,0:K], // L=A[K:M,0:K]) - panel to update with P1=A[0:M,K:K+N] = column(U=A[0:K,K:K+N], // T=A[K:M,K:K+N]) - panel to be updated -template -ESIMD_INLINE void dgetrfnp_left_step(double *a, int64_t lda, int64_t *info) { - auto p1 = V(GRF, M * N, 0); - double *a1; +template +ESIMD_INLINE void dgetrfnp_left_step(T *a, int64_t lda, int64_t *info) { + auto p1 = V(GRF, M * N, 0); + T *a1; int i, j, k; // load P1 for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda) for (i = 0; i < M; i += 8) { - simd data; + simd data; data.copy_from(a1 + i); V8(p1, j * M + i) = data; } // (getrf) factorize T=P*L*U - dgetrfnp_panel(info); + dgetrfnp_panel(info); // store P1 for (j = 0, a1 = a + K * lda; j < N; j++, a1 += lda) for (i = 0; i < M; i += 8) { - simd vals = V8(p1, j * M + i); + simd vals = V8(p1, j * M + i); vals.copy_to(a1 + i); } } -ESIMD_INLINE void dgetrfnp_esimd_8x8(double *a, int64_t lda, int64_t *ipiv, +template +ESIMD_INLINE void dgetrfnp_esimd_8x8(T *a, int64_t lda, int64_t *ipiv, int64_t *info) { *info = 0; - dgetrfnp_left_step<8, 8, 0>(a, lda, info); + dgetrfnp_left_step(a, lda, info); } -void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, +template +void dgetrfnp_batch_strided_c(int64_t m, int64_t n, T *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch, int64_t *info) { @@ -136,11 +138,11 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, CHECK(status = device.is_gpu(), !status); - double *a_gpu; + T *a_gpu; int64_t *ipiv_gpu; int64_t *info_gpu; - CHECK(a_gpu = static_cast( - malloc_shared(stride_a * batch * sizeof(double), device, context)), + CHECK(a_gpu = static_cast( + malloc_shared(stride_a * batch * sizeof(T), device, context)), !a_gpu); CHECK(ipiv_gpu = static_cast(malloc_shared( stride_ipiv * batch * sizeof(int64_t), device, context)), @@ -149,7 +151,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, malloc_shared(batch * sizeof(int64_t), device, context)), !info_gpu); - memcpy(a_gpu, a, stride_a * batch * sizeof(double)); + memcpy(a_gpu, a, stride_a * batch * sizeof(T)); sycl::nd_range<1> range(sycl::range<1>{static_cast(batch)}, sycl::range<1>{1}); @@ -171,7 +173,7 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, return; } - memcpy(a, a_gpu, stride_a * batch * sizeof(double)); + memcpy(a, a_gpu, stride_a * batch * sizeof(T)); memcpy(ipiv, ipiv_gpu, stride_ipiv * batch * sizeof(int64_t)); memcpy(info, info_gpu, batch * sizeof(int64_t)); @@ -180,14 +182,16 @@ void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, free(info_gpu, context); } -static void fp_init(int64_t m, int64_t n, double *a, int64_t lda) { +template +static void fp_init(int64_t m, int64_t n, T *a, int64_t lda) { int64_t i, j; for (j = 0; j < n; j++) for (i = 0; i < m; i++) - a[i + j * lda] = 2.0 * FP_RAND - 1.0; + a[i + j * lda] = 2.0 * ((T)rand() / (T)RAND_MAX) - 1.0; } -static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b, +template +static void fp_copy(int64_t m, int64_t n, T *a, int64_t lda, T *b, int64_t ldb) { int64_t i, j; for (j = 0; j < n; j++) @@ -195,8 +199,9 @@ static void fp_copy(int64_t m, int64_t n, double *a, int64_t lda, double *b, b[i + j * ldb] = a[i + j * lda]; } -static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) { - double sum, value = 0.0; +template +static T fp_norm1(int64_t m, int64_t n, T *a, int64_t lda) { + T sum, value = 0.0; int64_t i, j; for (j = 0; j < n; j++) { sum = 0.0; @@ -208,28 +213,35 @@ static double fp_norm1(int64_t m, int64_t n, double *a, int64_t lda) { return value; } -static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in, - double *a, int64_t lda, - int64_t stride_a, int64_t *ipiv, - int64_t stride_ipiv, int64_t batch, - int64_t *info) { - double thresh = 30.0; +template +static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, T *a_in, T *a, + int64_t lda, int64_t stride_a, + int64_t *ipiv, int64_t stride_ipiv, + int64_t batch, int64_t *info) { + T thresh = 30.0; int fail = 0; int64_t i, j, k, l; char label[1024]; - unsigned char prec_b[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c}; - double res = 0.0, nrm = 0.0, ulp = *(double *)prec_b; - double *w = (double *)malloc(sizeof(double) * MAX(m * n, 1)); + unsigned char prec_b1[] = {0, 0, 0xb0, 0x3c}; + unsigned char prec_b2[] = {0, 0, 0, 0, 0, 0, 0xb0, 0x3c}; + T res = 0.0, nrm = 0.0, ulp; + if (std::is_same::value) { + ulp = *(T *)prec_b2; + } else { + ulp = *(T *)prec_b1; + }; + + T *w = (T *)malloc(sizeof(T) * MAX(m * n, 1)); sprintf(label, "m=%ld, n=%ld, lda=%ld, batch=%ld", m, n, lda, batch); for (k = 0; k < batch; k++) { /* info == 0 */ - CHECK_AND_REPORT("info == 0", label, info[k] != 0, (double)info[k], fail); + CHECK_AND_REPORT("info == 0", label, info[k] != 0, (T)info[k], fail); if (m > 0 && n > 0) { /* | L U - A | / ( |A| n ulp ) */ - memset(w, 0, sizeof(double) * m * n); + memset(w, 0, sizeof(T) * m * n); if (m < n) { for (j = 0; j < n; j++) for (i = 0; i <= j; i++) @@ -256,7 +268,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in, w[i + j * m] -= a_in[k * stride_a + i + j * lda]; res = fp_norm1(m, n, w, m); nrm = fp_norm1(m, n, &a_in[k * stride_a], lda); - nrm *= (double)n * ulp; + nrm *= (T)n * ulp; res /= nrm > 0.0 ? nrm : ulp; CHECK_AND_REPORT("| L U - A | / ( |A| n ulp )", label, FAILED(res, thresh), res, fail); @@ -267,12 +279,7 @@ static int dgetrfnp_batch_strided_check(int64_t m, int64_t n, double *a_in, return fail; } -void dgetrfnp_batch_strided_c(int64_t m, int64_t n, double *a, int64_t lda, - int64_t stride_a, int64_t *ipiv, - int64_t stride_ipiv, int64_t batch, - int64_t *info); - -int main(int argc, char *argv[]) { +template int check(int argc, char *argv[]) { int exit_status = 0; constexpr int64_t m = 8, n = 8, lda = 8; int64_t stride_a = lda * n, stride_ipiv = n; @@ -285,10 +292,10 @@ int main(int argc, char *argv[]) { int64_t a_count = MAX(stride_a * batch, 1); int64_t ipiv_count = MAX(stride_ipiv * batch, 1); int64_t info_count = MAX(batch, 1); - double *a = NULL, *a_copy = NULL; + T *a = NULL, *a_copy = NULL; int64_t *ipiv = NULL, *info = NULL; - CHECK(a = (double *)malloc(sizeof(double) * a_count), !a); - CHECK(a_copy = (double *)malloc(sizeof(double) * a_count), !a_copy); + CHECK(a = (T *)malloc(sizeof(T) * a_count), !a); + CHECK(a_copy = (T *)malloc(sizeof(T) * a_count), !a_copy); CHECK(ipiv = (int64_t *)malloc(sizeof(int64_t) * ipiv_count), !ipiv); CHECK(info = (int64_t *)malloc(sizeof(int64_t) * info_count), !info); @@ -313,3 +320,14 @@ int main(int argc, char *argv[]) { } return exit_status; } + +int main(int argc, char *argv[]) { + int Passed = 0; + + Passed += check(argc, argv); +#ifdef ENABLE_FP64 + Passed += check(argc, argv); +#endif + + return Passed; +} diff --git a/SYCL/ESIMD/regression/dgetrf_8x8_aspect_fp64.cpp b/SYCL/ESIMD/regression/dgetrf_8x8_aspect_fp64.cpp new file mode 100644 index 0000000000..6adfc7d568 --- /dev/null +++ b/SYCL/ESIMD/regression/dgetrf_8x8_aspect_fp64.cpp @@ -0,0 +1,18 @@ +//==------- dgetrf_8x8_aspect_fp64.cpp - DPC++ ESIMD on-device test -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -I%S/.. -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out 1 +// +#include "dgetrf_8x8.cpp" diff --git a/SYCL/ESIMD/regression/dgetrf_aspect_fp64.cpp b/SYCL/ESIMD/regression/dgetrf_aspect_fp64.cpp new file mode 100644 index 0000000000..29c31249c1 --- /dev/null +++ b/SYCL/ESIMD/regression/dgetrf_aspect_fp64.cpp @@ -0,0 +1,18 @@ +//==--------- dgetrf_aspect_fp64.cpp - DPC++ ESIMD on-device test ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -I%S/.. -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out 3 2 1 + +#include "Inputs/dgetrf.hpp" diff --git a/SYCL/ESIMD/regression/dgetrf_ref_aspect_fp64.cpp b/SYCL/ESIMD/regression/dgetrf_ref_aspect_fp64.cpp new file mode 100644 index 0000000000..bb91041317 --- /dev/null +++ b/SYCL/ESIMD/regression/dgetrf_ref_aspect_fp64.cpp @@ -0,0 +1,19 @@ +//==------- dgetrf_ref_aspect_fp64.cpp - DPC++ ESIMD on-device test -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: gpu, aspect-fp64 +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl -DUSE_REF -DENABLE_FP64 %s -I%S/.. -o %t.ref.out +// RUN: %GPU_RUN_PLACEHOLDER %t.ref.out 3 2 1 +// + +#include "Inputs/dgetrf.hpp" diff --git a/SYCL/ESIMD/spec_const/spec_const_double.cpp b/SYCL/ESIMD/spec_const/spec_const_double.cpp index 658164f702..8358a4fa82 100644 --- a/SYCL/ESIMD/spec_const/spec_const_double.cpp +++ b/SYCL/ESIMD/spec_const/spec_const_double.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: gpu +// REQUIRES: gpu, aspect-fp64 // RUN: %clangxx -fsycl -I%S/.. %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // UNSUPPORTED: cuda || hip diff --git a/SYCL/GroupAlgorithm/SYCL2020/sort.cpp b/SYCL/GroupAlgorithm/SYCL2020/sort.cpp index b3e3875b2c..73416de04a 100644 --- a/SYCL/GroupAlgorithm/SYCL2020/sort.cpp +++ b/SYCL/GroupAlgorithm/SYCL2020/sort.cpp @@ -358,10 +358,11 @@ int main(int argc, char *argv[]) { test_sort_by_type(q, sizes[i]); test_sort_by_type(q, sizes[i]); test_sort_by_type(q, sizes[i]); - test_sort_by_type(q, sizes[i]); test_sort_by_type(q, sizes[i]); - test_custom_type(q, sizes[i]); +#ifdef ENABLE_FP64 + test_sort_by_type(q, sizes[i]); +#endif } std::cout << "Test passed." << std::endl; } diff --git a/SYCL/GroupAlgorithm/SYCL2020/sort_aspect_fp64.cpp b/SYCL/GroupAlgorithm/SYCL2020/sort_aspect_fp64.cpp new file mode 100644 index 0000000000..95137d2200 --- /dev/null +++ b/SYCL/GroupAlgorithm/SYCL2020/sort_aspect_fp64.cpp @@ -0,0 +1,12 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -I . -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include "sort.cpp" diff --git a/SYCL/InlineAsm/asm_float_add.cpp b/SYCL/InlineAsm/asm_float_add.cpp index a9a7edd16f..f8aa0d6679 100644 --- a/SYCL/InlineAsm/asm_float_add.cpp +++ b/SYCL/InlineAsm/asm_float_add.cpp @@ -9,9 +9,7 @@ #include #include -using dataType = sycl::cl_double; - -template +template struct KernelFunctor : WithInputBuffers, WithOutputBuffer { KernelFunctor(const std::vector &input1, const std::vector &input2) : WithInputBuffers(input1, input2), WithOutputBuffer( @@ -42,26 +40,36 @@ struct KernelFunctor : WithInputBuffers, WithOutputBuffer { } }; -int main() { - std::vector inputA(DEFAULT_PROBLEM_SIZE), - inputB(DEFAULT_PROBLEM_SIZE); +template bool check() { + std::vector inputA(DEFAULT_PROBLEM_SIZE), inputB(DEFAULT_PROBLEM_SIZE); for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) { - inputA[i] = (double)1 / std::pow(2, i); - inputB[i] = (double)2 / std::pow(2, i); + inputA[i] = (T1)1 / std::pow(2, i); + inputB[i] = (T1)2 / std::pow(2, i); } - KernelFunctor<> f(inputA, inputB); + KernelFunctor f(inputA, inputB); if (!launchInlineASMTest(f)) - return 0; + return true; auto &C = f.getOutputBufferData(); for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) { if (C[i] != inputA[i] + inputB[i]) { std::cerr << "At index: " << i << ". "; std::cerr << C[i] << " != " << inputA[i] + inputB[i] << "\n"; - return 1; + return false; } } - return 0; + return true; +} + +int main() { + bool Passed = true; + + Passed &= check(); +#ifdef ENABLE_FP64 + Passed &= check(); +#endif + + return Passed ? 0 : 1; } diff --git a/SYCL/InlineAsm/asm_float_add_aspect_fp64.cpp b/SYCL/InlineAsm/asm_float_add_aspect_fp64.cpp new file mode 100644 index 0000000000..b923f9a5cd --- /dev/null +++ b/SYCL/InlineAsm/asm_float_add_aspect_fp64.cpp @@ -0,0 +1,11 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// UNSUPPORTED: cuda || hip_nvidia +// REQUIRES: gpu,linux,aspect-fp64 +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "asm_float_add.cpp" diff --git a/SYCL/InlineAsm/asm_float_imm_arg.cpp b/SYCL/InlineAsm/asm_float_imm_arg.cpp index 13aae5c455..3946beaefa 100644 --- a/SYCL/InlineAsm/asm_float_imm_arg.cpp +++ b/SYCL/InlineAsm/asm_float_imm_arg.cpp @@ -9,13 +9,12 @@ #include #include -constexpr double IMM_ARGUMENT = 0.5; -using dataType = sycl::cl_double; +template constexpr T IMM_ARGUMENT = T(0.5); -template -struct KernelFunctor : WithInputBuffers, WithOutputBuffer { - KernelFunctor(const std::vector &input) - : WithInputBuffers(input), WithOutputBuffer(input.size()) {} +template +struct KernelFunctor : WithInputBuffers, WithOutputBuffer { + KernelFunctor(const std::vector &input) + : WithInputBuffers(input), WithOutputBuffer(input.size()) {} void operator()(sycl::handler &cgh) { auto A = @@ -25,36 +24,49 @@ struct KernelFunctor : WithInputBuffers, WithOutputBuffer { this->getOutputBuffer().template get_access( cgh); - cgh.parallel_for>( + cgh.parallel_for>( sycl::range<1>{this->getOutputBufferSize()}, [=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] { #if defined(__SYCL_DEVICE_ONLY__) asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2" : "=rw"(B[wiID]) - : "rw"(A[wiID]), "i"(IMM_ARGUMENT)); + : "rw"(A[wiID]), "i"(IMM_ARGUMENT)); #else - B[wiID] = A[wiID] * IMM_ARGUMENT; + B[wiID] = A[wiID] * IMM_ARGUMENT; #endif }); } }; -int main() { - std::vector input(DEFAULT_PROBLEM_SIZE); +template bool check() { + constexpr T1 IMM_ARGUMENT = T1(0.5); + + std::vector input(DEFAULT_PROBLEM_SIZE); for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) - input[i] = (double)1 / std::pow(2, i); + input[i] = (T1)1 / std::pow(2, i); - KernelFunctor<> f(input); + KernelFunctor f(input); if (!launchInlineASMTest(f)) - return 0; + return true; auto &B = f.getOutputBufferData(); for (int i = 0; i < DEFAULT_PROBLEM_SIZE; ++i) { if (B[i] != input[i] * IMM_ARGUMENT) { std::cerr << "At index: " << i << ". "; std::cerr << B[i] << " != " << input[i] * IMM_ARGUMENT << "\n"; - return 1; + return false; } } - return 0; + return true; +} + +int main() { + bool Passed = true; + + Passed &= check(); +#ifdef ENABLE_FP64 + Passed &= check(); +#endif + + return Passed ? 0 : 1; } diff --git a/SYCL/InlineAsm/asm_float_imm_arg_aspect_fp64.cpp b/SYCL/InlineAsm/asm_float_imm_arg_aspect_fp64.cpp new file mode 100644 index 0000000000..a837defb76 --- /dev/null +++ b/SYCL/InlineAsm/asm_float_imm_arg_aspect_fp64.cpp @@ -0,0 +1,11 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// UNSUPPORTED: cuda || hip_nvidia +// REQUIRES: gpu,linux,aspect-fp64 +// RUN: %clangxx -fsycl -DENABLE_FP64 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "asm_float_imm_arg.cpp" diff --git a/SYCL/KernelParams/union_kernel_param.cpp b/SYCL/KernelParams/union_kernel_param.cpp index 0cacf310a8..c1f6ea1ca5 100644 --- a/SYCL/KernelParams/union_kernel_param.cpp +++ b/SYCL/KernelParams/union_kernel_param.cpp @@ -9,32 +9,43 @@ #include #include -union TestUnion { +template union TestUnion { public: int myint; char mychar; - double mydouble; + T mytype; - TestUnion() { mydouble = 0.0; }; + TestUnion() { mytype = 0.0; }; }; -int main(int argc, char **argv) { - TestUnion x; - x.mydouble = 5.0; - double mydouble = 0.0; +template bool check() { + TestUnion x; + x.mytype = 5.0; + T mytype = 0.0; sycl::queue queue; { - sycl::buffer buf(&mydouble, 1); + sycl::buffer buf(&mytype, 1); queue.submit([&](sycl::handler &cgh) { - auto acc = buf.get_access(cgh); - cgh.single_task([=]() { acc[0] = x.mydouble; }); + auto acc = buf.template get_access(cgh); + cgh.single_task([=]() { acc[0] = x.mytype; }); }); } - if (mydouble != 5.0) { - printf("FAILED\nmydouble = %d\n", mydouble); - return 1; + if (mytype != 5.0) { + printf("FAILED\nmytype = %d\n", mytype); + return false; } - return 0; + return true; +} + +int main(int argc, char **argv) { + bool Passed = true; + + Passed &= check(); +#ifdef ENABLE_FP64 + Passed &= check(); +#endif + + return Passed ? 0 : 1; } diff --git a/SYCL/KernelParams/union_kernel_param_aspect_fp64.cpp b/SYCL/KernelParams/union_kernel_param_aspect_fp64.cpp new file mode 100644 index 0000000000..de5afb7dfa --- /dev/null +++ b/SYCL/KernelParams/union_kernel_param_aspect_fp64.cpp @@ -0,0 +1,12 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. + +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +#include "union_kernel_param.cpp" diff --git a/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp b/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp index 0057048134..c7ff46673d 100644 --- a/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp +++ b/SYCL/Regression/commandlist/Inputs/FindPrimesSYCL.cpp @@ -51,7 +51,7 @@ float find_prime_s(work *w) { if (number < N) { for (size_t i = 0; i < niter; ++i) { bool is_prime = !(number % 2 == 0); - const int upper_bound = sycl::sqrt(1.0 * number) + 1; + const int upper_bound = sycl::sqrt(1.0f * number) + 1; int k = 3; while (k < upper_bound && is_prime) { is_prime = !(number % k == 0); diff --git a/SYCL/SpecConstants/2020/handler-api.cpp b/SYCL/SpecConstants/2020/handler-api.cpp index f9df9b82a1..2c3f8d8065 100644 --- a/SYCL/SpecConstants/2020/handler-api.cpp +++ b/SYCL/SpecConstants/2020/handler-api.cpp @@ -23,7 +23,9 @@ constexpr sycl::specialization_id int_id; constexpr sycl::specialization_id int_id2(2); +#ifdef ENABLE_FP64 constexpr sycl::specialization_id double_id(3.14); +#endif constexpr sycl::specialization_id custom_type_id; class TestDefaultValuesKernel; @@ -71,19 +73,25 @@ int main() { bool test_default_values(sycl::queue q) { sycl::buffer int_buffer(1); sycl::buffer int_buffer2(1); +#ifdef ENABLE_FP64 sycl::buffer double_buffer(1); +#endif sycl::buffer custom_type_buffer(1); q.submit([&](sycl::handler &cgh) { auto int_acc = int_buffer.get_access(cgh); auto int_acc2 = int_buffer2.get_access(cgh); +#ifdef ENABLE_FP64 auto double_acc = double_buffer.get_access(cgh); +#endif auto custom_type_acc = custom_type_buffer.get_access(cgh); cgh.single_task([=](sycl::kernel_handler kh) { int_acc[0] = kh.get_specialization_constant(); int_acc2[0] = kh.get_specialization_constant(); +#ifdef ENABLE_FP64 double_acc[0] = kh.get_specialization_constant(); +#endif custom_type_acc[0] = kh.get_specialization_constant(); }); }); @@ -98,9 +106,11 @@ bool test_default_values(sycl::queue q) { if (!check_value(2, int_acc2[0], "integer specialization constant")) return false; +#ifdef ENABLE_FP64 auto double_acc = double_buffer.get_access(); if (!check_value(3.14, double_acc[0], "double specialization constant")) return false; +#endif auto custom_type_acc = custom_type_buffer.get_access(); @@ -120,9 +130,11 @@ bool test_set_and_get_on_host(sycl::queue q) { "integer specializaiton constant before setting any value")) ++errors; +#ifdef ENABLE_FP64 if (!check_value(3.14, cgh.get_specialization_constant(), "double specializaiton constant before setting any value")) ++errors; +#endif custom_type custom_type_ref; if (!check_value( @@ -131,10 +143,15 @@ bool test_set_and_get_on_host(sycl::queue q) { ++errors; int new_int_value = 8; +#ifdef ENABLE_FP64 double new_double_value = 3.0; +#endif custom_type new_custom_type_value('b', 1.0, 12); + cgh.set_specialization_constant(new_int_value); +#ifdef ENABLE_FP64 cgh.set_specialization_constant(new_double_value); +#endif cgh.set_specialization_constant(new_custom_type_value); if (!check_value( @@ -142,10 +159,12 @@ bool test_set_and_get_on_host(sycl::queue q) { "integer specializaiton constant after setting a new value")) ++errors; +#ifdef ENABLE_FP64 if (!check_value( new_double_value, cgh.get_specialization_constant(), "double specializaiton constant after setting a new value")) ++errors; +#endif if (!check_value( new_custom_type_value, @@ -162,30 +181,40 @@ bool test_set_and_get_on_host(sycl::queue q) { bool test_set_and_get_on_device(sycl::queue q) { sycl::buffer int_buffer(1); sycl::buffer int_buffer2(1); +#ifdef ENABLE_FP64 sycl::buffer double_buffer(1); +#endif sycl::buffer custom_type_buffer(1); int new_int_value = 8; int new_int_value2 = 0; +#ifdef ENABLE_FP64 double new_double_value = 3.0; +#endif custom_type new_custom_type_value('b', 1.0, 12); q.submit([&](sycl::handler &cgh) { auto int_acc = int_buffer.get_access(cgh); auto int_acc2 = int_buffer2.get_access(cgh); +#ifdef ENABLE_FP64 auto double_acc = double_buffer.get_access(cgh); +#endif auto custom_type_acc = custom_type_buffer.get_access(cgh); cgh.set_specialization_constant(new_int_value); - cgh.set_specialization_constant(new_int_value2); +#ifdef ENABLE_FP64 cgh.set_specialization_constant(new_double_value); +#endif + cgh.set_specialization_constant(new_int_value2); cgh.set_specialization_constant(new_custom_type_value); cgh.single_task([=](sycl::kernel_handler kh) { int_acc[0] = kh.get_specialization_constant(); int_acc2[0] = kh.get_specialization_constant(); +#ifdef ENABLE_FP64 double_acc[0] = kh.get_specialization_constant(); +#endif custom_type_acc[0] = kh.get_specialization_constant(); }); }); @@ -200,10 +229,12 @@ bool test_set_and_get_on_device(sycl::queue q) { "integer specialization constant")) return false; +#ifdef ENABLE_FP64 auto double_acc = double_buffer.get_access(); if (!check_value(new_double_value, double_acc[0], "double specialization constant")) return false; +#endif auto custom_type_acc = custom_type_buffer.get_access(); diff --git a/SYCL/SpecConstants/2020/handler-api_aspect_fp64.cpp b/SYCL/SpecConstants/2020/handler-api_aspect_fp64.cpp new file mode 100644 index 0000000000..3c4b3edc8a --- /dev/null +++ b/SYCL/SpecConstants/2020/handler-api_aspect_fp64.cpp @@ -0,0 +1,15 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out \ +// RUN: -fsycl-dead-args-optimization +// FIXME: SYCL 2020 specialization constants are not supported on host device +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// FIXME: ACC devices use emulation path, which is not yet supported +// UNSUPPORTED: hip + +#include "handler-api.cpp" diff --git a/SYCL/SpecConstants/2020/kernel-bundle-api.cpp b/SYCL/SpecConstants/2020/kernel-bundle-api.cpp index 900da6fe09..09c4a24a74 100644 --- a/SYCL/SpecConstants/2020/kernel-bundle-api.cpp +++ b/SYCL/SpecConstants/2020/kernel-bundle-api.cpp @@ -21,8 +21,10 @@ #include "common.hpp" constexpr sycl::specialization_id int_id; -constexpr sycl::specialization_id double_id(3.14); constexpr sycl::specialization_id custom_type_id; +#ifdef ENABLE_FP64 +constexpr sycl::specialization_id double_id(3.14); +#endif class TestDefaultValuesKernel; class EmptyKernel; @@ -76,9 +78,10 @@ bool test_default_values(sycl::queue q) { } sycl::buffer int_buffer(1); - sycl::buffer double_buffer(1); sycl::buffer custom_type_buffer(1); - +#ifdef ENABLE_FP64 + sycl::buffer double_buffer(1); +#endif auto input_bundle = sycl::get_kernel_bundle(q.get_context()); auto exec_bundle = sycl::build(input_bundle); @@ -86,33 +89,36 @@ bool test_default_values(sycl::queue q) { q.submit([&](sycl::handler &cgh) { cgh.use_kernel_bundle(exec_bundle); auto int_acc = int_buffer.get_access(cgh); - auto double_acc = double_buffer.get_access(cgh); auto custom_type_acc = custom_type_buffer.get_access(cgh); +#ifdef ENABLE_FP64 + auto double_acc = double_buffer.get_access(cgh); +#endif cgh.single_task([=](sycl::kernel_handler kh) { int_acc[0] = kh.get_specialization_constant(); - double_acc[0] = kh.get_specialization_constant(); custom_type_acc[0] = kh.get_specialization_constant(); +#ifdef ENABLE_FP64 + double_acc[0] = kh.get_specialization_constant(); +#endif }); }); - auto int_acc = int_buffer.get_access(); if (!check_value( 0, int_acc[0], "integer specialization constant (defined without default value)")) return false; - auto double_acc = double_buffer.get_access(); - if (!check_value(3.14, double_acc[0], "double specialization constant")) - return false; - auto custom_type_acc = custom_type_buffer.get_access(); const custom_type custom_type_ref; if (!check_value(custom_type_ref, custom_type_acc[0], "custom_type specialization constant")) return false; - +#ifdef ENABLE_FP64 + auto double_acc = double_buffer.get_access(); + if (!check_value(3.14, double_acc[0], "double specialization constant")) + return false; +#endif return true; } @@ -138,34 +144,37 @@ bool test_set_and_get_on_host(sycl::queue q) { << std::endl; return false; } - // Check default values if (!check_value( 0, input_bundle.get_specialization_constant(), "integer specializaiton constant before setting any value")) ++errors; - if (!check_value(3.14, - input_bundle.get_specialization_constant(), - "double specializaiton constant before setting any value")) - ++errors; - custom_type custom_type_ref; if (!check_value( custom_type_ref, input_bundle.get_specialization_constant(), "custom_type specializaiton constant before setting any value")) ++errors; - +#ifdef ENABLE_FP64 + if (!check_value(3.14, + input_bundle.get_specialization_constant(), + "double specializaiton constant before setting any value")) + ++errors; +#endif // Update values int new_int_value = 42; - double new_double_value = 3.0; custom_type new_custom_type_value('b', 1.0, 12); +#ifdef ENABLE_FP64 + double new_double_value = 3.0; +#endif input_bundle.set_specialization_constant(new_int_value); - input_bundle.set_specialization_constant(new_double_value); input_bundle.set_specialization_constant( new_custom_type_value); +#ifdef ENABLE_FP64 + input_bundle.set_specialization_constant(new_double_value); +#endif // And re-check them again if (!check_value( @@ -173,16 +182,17 @@ bool test_set_and_get_on_host(sycl::queue q) { "integer specializaiton constant after setting a new value")) ++errors; - if (!check_value(new_double_value, - input_bundle.get_specialization_constant(), - "double specializaiton constant after setting a value")) - ++errors; - if (!check_value( new_custom_type_value, input_bundle.get_specialization_constant(), "custom_type specializaiton constant after setting a new value")) ++errors; +#ifdef ENABLE_FP64 + if (!check_value(new_double_value, + input_bundle.get_specialization_constant(), + "double specializaiton constant after setting a value")) + ++errors; +#endif // Let's try to build the bundle auto exec_bundle = sycl::build(input_bundle); @@ -193,15 +203,16 @@ bool test_set_and_get_on_host(sycl::queue q) { "integer specializaiton constant after build")) ++errors; - if (!check_value(new_double_value, - exec_bundle.get_specialization_constant(), - "double specializaiton constant after build")) - ++errors; - if (!check_value(new_custom_type_value, exec_bundle.get_specialization_constant(), "custom_type specializaiton constant after build")) ++errors; +#ifdef ENABLE_FP64 + if (!check_value(new_double_value, + exec_bundle.get_specialization_constant(), + "double specializaiton constant after build")) + ++errors; +#endif } catch (sycl::exception &e) { } @@ -210,50 +221,57 @@ bool test_set_and_get_on_host(sycl::queue q) { bool test_set_and_get_on_device(sycl::queue q) { sycl::buffer int_buffer(1); - sycl::buffer double_buffer(1); sycl::buffer custom_type_buffer(1); +#ifdef ENABLE_FP64 + sycl::buffer double_buffer(1); +#endif int new_int_value = 42; - double new_double_value = 3.0; custom_type new_custom_type_value('b', 1.0, 12); +#ifdef ENABLE_FP64 + double new_double_value = 3.0; +#endif auto input_bundle = sycl::get_kernel_bundle(q.get_context()); input_bundle.set_specialization_constant(new_int_value); - input_bundle.set_specialization_constant(new_double_value); input_bundle.set_specialization_constant( new_custom_type_value); +#ifdef ENABLE_FP64 + input_bundle.set_specialization_constant(new_double_value); +#endif auto exec_bundle = sycl::build(input_bundle); - q.submit([&](sycl::handler &cgh) { cgh.use_kernel_bundle(exec_bundle); auto int_acc = int_buffer.get_access(cgh); - auto double_acc = double_buffer.get_access(cgh); auto custom_type_acc = custom_type_buffer.get_access(cgh); - +#ifdef ENABLE_FP64 + auto double_acc = double_buffer.get_access(cgh); +#endif cgh.single_task([=](sycl::kernel_handler kh) { int_acc[0] = kh.get_specialization_constant(); - double_acc[0] = kh.get_specialization_constant(); custom_type_acc[0] = kh.get_specialization_constant(); +#ifdef ENABLE_FP64 + double_acc[0] = kh.get_specialization_constant(); +#endif }); }); - auto int_acc = int_buffer.get_access(); if (!check_value(new_int_value, int_acc[0], "integer specialization constant")) return false; - auto double_acc = double_buffer.get_access(); - if (!check_value(new_double_value, double_acc[0], - "double specialization constant")) - return false; - auto custom_type_acc = custom_type_buffer.get_access(); if (!check_value(new_custom_type_value, custom_type_acc[0], "custom_type specialization constant")) return false; - +#ifdef ENABLE_FP64 + auto double_acc = double_buffer.get_access(); + if (!check_value(new_double_value, double_acc[0], + "double specialization constant")) + return false; +#endif return true; } diff --git a/SYCL/SpecConstants/2020/kernel-bundle-api_aspect_fp64.cpp b/SYCL/SpecConstants/2020/kernel-bundle-api_aspect_fp64.cpp new file mode 100644 index 0000000000..e6bc742141 --- /dev/null +++ b/SYCL/SpecConstants/2020/kernel-bundle-api_aspect_fp64.cpp @@ -0,0 +1,15 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out \ +// RUN: -fsycl-dead-args-optimization +// FIXME: SYCL 2020 specialization constants are not supported on host device +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// FIXME: ACC devices use emulation path, which is not yet supported +// UNSUPPORTED: hip + +#include "kernel-bundle-api.cpp" diff --git a/SYCL/SubGroup/barrier.cpp b/SYCL/SubGroup/barrier.cpp index 050424b359..3b50c0198f 100644 --- a/SYCL/SubGroup/barrier.cpp +++ b/SYCL/SubGroup/barrier.cpp @@ -86,10 +86,12 @@ int main() { check(Queue); check(Queue); check(Queue); +#ifdef ENABLE_FP64 if (Queue.get_device().has(sycl::aspect::fp64)) { check(Queue); check(Queue); } +#endif std::cout << "Test passed." << std::endl; return 0; } diff --git a/SYCL/SubGroup/barrier_aspect_fp64.cpp b/SYCL/SubGroup/barrier_aspect_fp64.cpp new file mode 100644 index 0000000000..bb720a7087 --- /dev/null +++ b/SYCL/SubGroup/barrier_aspect_fp64.cpp @@ -0,0 +1,19 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +//==-- barrier_aspect_fp64.cpp - SYCL sub_group barrier test ---*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "barrier.cpp" diff --git a/SYCL/SubGroup/broadcast_fp64.cpp b/SYCL/SubGroup/broadcast_fp64.cpp index 46af65d9e1..4a4e5f4ba4 100644 --- a/SYCL/SubGroup/broadcast_fp64.cpp +++ b/SYCL/SubGroup/broadcast_fp64.cpp @@ -1,3 +1,4 @@ +// REQUIRES: aspect-fp64 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/SubGroup/generic-shuffle.cpp b/SYCL/SubGroup/generic-shuffle.cpp index e979ddfa85..2ad91b1e49 100644 --- a/SYCL/SubGroup/generic-shuffle.cpp +++ b/SYCL/SubGroup/generic-shuffle.cpp @@ -224,13 +224,13 @@ int main() { }; check_struct>( Queue, ComplexFloatGenerator); - +#ifdef ENABLE_FP64 auto ComplexDoubleGenerator = [state = std::complex(0, 1)]() mutable { return state += std::complex(2, 2); }; check_struct>( Queue, ComplexDoubleGenerator); - +#endif std::cout << "Test passed." << std::endl; return 0; } diff --git a/SYCL/SubGroup/generic-shuffle_aspect_fp64.cpp b/SYCL/SubGroup/generic-shuffle_aspect_fp64.cpp new file mode 100644 index 0000000000..fef972a51c --- /dev/null +++ b/SYCL/SubGroup/generic-shuffle_aspect_fp64.cpp @@ -0,0 +1,20 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// +//==-------------- generic_shuffle_aspect-fp64.cpp -*- C++ -*---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "generic-shuffle.cpp" diff --git a/SYCL/SubGroup/info.cpp b/SYCL/SubGroup/info.cpp index 74e6349849..e44afdd5ad 100644 --- a/SYCL/SubGroup/info.cpp +++ b/SYCL/SubGroup/info.cpp @@ -40,7 +40,11 @@ int main() { auto Kernel = KB.get_kernel(KernelID); range<2> GlobalRange{50, 40}; +#ifdef ENABLE_FP64 buffer ABuf{GlobalRange}, BBuf{GlobalRange}, CBuf{GlobalRange}; +#else + buffer ABuf{GlobalRange}, BBuf{GlobalRange}, CBuf{GlobalRange}; +#endif Queue.submit([&](sycl::handler &cgh) { auto A = ABuf.get_access(cgh); diff --git a/SYCL/SubGroup/info_aspect_fp64.cpp b/SYCL/SubGroup/info_aspect_fp64.cpp new file mode 100644 index 0000000000..b7a1589798 --- /dev/null +++ b/SYCL/SubGroup/info_aspect_fp64.cpp @@ -0,0 +1,16 @@ +// See https://github.com/intel/llvm/issues/2922 for more info +// UNSUPPORTED: cuda || hip +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +//==------------- info.cpp - SYCL sub_group parameters test ----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "info.cpp" diff --git a/SYCL/SubGroup/load_store.cpp b/SYCL/SubGroup/load_store.cpp index 9a1ea2a9f0..7033a62813 100644 --- a/SYCL/SubGroup/load_store.cpp +++ b/SYCL/SubGroup/load_store.cpp @@ -262,6 +262,7 @@ int main() { check(Queue); check(Queue); check(Queue); +#ifdef ENABLE_FP64 typedef double aligned_double __attribute__((aligned(16))); check(Queue); check(Queue); @@ -270,6 +271,7 @@ int main() { check(Queue); check(Queue); check(Queue); +#endif } std::cout << "Test passed." << std::endl; return 0; diff --git a/SYCL/SubGroup/load_store_aspect_fp64.cpp b/SYCL/SubGroup/load_store_aspect_fp64.cpp new file mode 100644 index 0000000000..02b184d89e --- /dev/null +++ b/SYCL/SubGroup/load_store_aspect_fp64.cpp @@ -0,0 +1,24 @@ +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// +// Missing __spirv_SubgroupBlockReadINTEL, __spirv_SubgroupBlockWriteINTEL on +// AMD +// XFAIL: hip_amd +// +//==----- load_store_aspect_fp64.cpp - SYCL sub_group load/store test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "load_store.cpp" diff --git a/SYCL/SubGroup/shuffle_fp64.cpp b/SYCL/SubGroup/shuffle_fp64.cpp index cc6baeb6e3..87bcaa02b9 100644 --- a/SYCL/SubGroup/shuffle_fp64.cpp +++ b/SYCL/SubGroup/shuffle_fp64.cpp @@ -1,3 +1,4 @@ +// REQUIRES: aspect-fp64 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/USM/copy.cpp b/SYCL/USM/copy.cpp index 2613326756..3d9786548f 100644 --- a/SYCL/USM/copy.cpp +++ b/SYCL/USM/copy.cpp @@ -28,12 +28,19 @@ struct test_struct { long long d; half e; float f; +#ifdef ENABLE_FP64 double g; +#endif }; bool operator==(const test_struct &lhs, const test_struct &rhs) { +#ifdef ENABLE_FP64 return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; +#else + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.e == rhs.e && lhs.f == rhs.f; +#endif } template T *regular(queue q, alloc kind) { @@ -88,74 +95,90 @@ int main() { queue q; auto dev = q.get_device(); +#ifdef ENABLE_FP64 test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f, 4.24242}; +#else + test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f}; +#endif if (dev.has(aspect::usm_host_allocations)) { +#ifdef ENABLE_FP64 + runTests(q, 4.24242, alloc::host, alloc::host); +#endif runTests(q, 4, alloc::host, alloc::host); runTests(q, 42, alloc::host, alloc::host); runTests(q, 424, alloc::host, alloc::host); runTests(q, 4242, alloc::host, alloc::host); runTests(q, half(4.2f), alloc::host, alloc::host); runTests(q, 4.242f, alloc::host, alloc::host); - runTests(q, 4.24242, alloc::host, alloc::host); runTests(q, test_obj, alloc::host, alloc::host); } if (dev.has(aspect::usm_shared_allocations)) { +#ifdef ENABLE_FP64 + runTests(q, 4.24242, alloc::shared, alloc::shared); +#endif runTests(q, 4, alloc::shared, alloc::shared); runTests(q, 42, alloc::shared, alloc::shared); runTests(q, 424, alloc::shared, alloc::shared); runTests(q, 4242, alloc::shared, alloc::shared); runTests(q, half(4.2f), alloc::shared, alloc::shared); runTests(q, 4.242f, alloc::shared, alloc::shared); - runTests(q, 4.24242, alloc::shared, alloc::shared); runTests(q, test_obj, alloc::shared, alloc::shared); } if (dev.has(aspect::usm_device_allocations)) { +#ifdef ENABLE_FP64 + runTests(q, 4.24242, alloc::device, alloc::device); +#endif runTests(q, 4, alloc::device, alloc::device); runTests(q, 42, alloc::device, alloc::device); runTests(q, 424, alloc::device, alloc::device); runTests(q, 4242, alloc::device, alloc::device); runTests(q, half(4.2f), alloc::device, alloc::device); runTests(q, 4.242f, alloc::device, alloc::device); - runTests(q, 4.24242, alloc::device, alloc::device); runTests(q, test_obj, alloc::device, alloc::device); } if (dev.has(aspect::usm_host_allocations) && dev.has(aspect::usm_shared_allocations)) { +#ifdef ENABLE_FP64 + runTests(q, 4.24242, alloc::host, alloc::shared); +#endif runTests(q, 4, alloc::host, alloc::shared); runTests(q, 42, alloc::host, alloc::shared); runTests(q, 424, alloc::host, alloc::shared); runTests(q, 4242, alloc::host, alloc::shared); runTests(q, half(4.2f), alloc::host, alloc::shared); runTests(q, 4.242f, alloc::host, alloc::shared); - runTests(q, 4.24242, alloc::host, alloc::shared); runTests(q, test_obj, alloc::host, alloc::shared); } if (dev.has(aspect::usm_host_allocations) && dev.has(aspect::usm_device_allocations)) { +#ifdef ENABLE_FP64 + runTests(q, 4.24242, alloc::host, alloc::device); +#endif runTests(q, 4, alloc::host, alloc::device); runTests(q, 42, alloc::host, alloc::device); runTests(q, 424, alloc::host, alloc::device); runTests(q, 4242, alloc::host, alloc::device); runTests(q, half(4.2f), alloc::host, alloc::device); runTests(q, 4.242f, alloc::host, alloc::device); - runTests(q, 4.24242, alloc::host, alloc::device); runTests(q, test_obj, alloc::host, alloc::device); } if (dev.has(aspect::usm_shared_allocations) && dev.has(aspect::usm_device_allocations)) { +#ifdef ENABLE_FP64 + runTests(q, 4.24242, alloc::shared, alloc::device); +#endif runTests(q, 4, alloc::shared, alloc::device); runTests(q, 42, alloc::shared, alloc::device); runTests(q, 424, alloc::shared, alloc::device); runTests(q, 4242, alloc::shared, alloc::device); runTests(q, half(4.2f), alloc::shared, alloc::device); runTests(q, 4.242f, alloc::shared, alloc::device); - runTests(q, 4.24242, alloc::shared, alloc::device); runTests(q, test_obj, alloc::shared, alloc::device); } diff --git a/SYCL/USM/copy_aspect_fp64.cpp b/SYCL/USM/copy_aspect_fp64.cpp new file mode 100644 index 0000000000..b02bbbf52a --- /dev/null +++ b/SYCL/USM/copy_aspect_fp64.cpp @@ -0,0 +1,21 @@ +//==--------------- copy_aspect-fp64.cp - USM copy test --------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t1.out +// RUN: %HOST_RUN_PLACEHOLDER %t1.out +// RUN: %CPU_RUN_PLACEHOLDER %t1.out +// RUN: %GPU_RUN_PLACEHOLDER %t1.out +// RUN: %ACC_RUN_PLACEHOLDER %t1.out + +#include "copy.cpp"; diff --git a/SYCL/USM/fill.cpp b/SYCL/USM/fill.cpp index c3b96abcad..2f37ad58d2 100644 --- a/SYCL/USM/fill.cpp +++ b/SYCL/USM/fill.cpp @@ -28,12 +28,19 @@ struct test_struct { long long d; sycl::half e; float f; +#ifdef ENABLE_FP64 double g; +#endif }; bool operator==(const test_struct &lhs, const test_struct &rhs) { +#ifdef ENABLE_FP64 return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; +#else + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.e == rhs.e && lhs.f == rhs.f; +#endif } template @@ -125,38 +132,48 @@ int main() { auto dev = q.get_device(); auto ctxt = q.get_context(); +#ifdef ENABLE_FP64 test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; +#else + test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242}; +#endif if (dev.get_info()) { +#ifdef ENABLE_FP64 + runHostTests(dev, ctxt, q, 4.24242); +#endif runHostTests(dev, ctxt, q, 4); runHostTests(dev, ctxt, q, 42); runHostTests(dev, ctxt, q, 424); runHostTests(dev, ctxt, q, 4242); runHostTests(dev, ctxt, q, sycl::half(4.2f)); runHostTests(dev, ctxt, q, 4.242f); - runHostTests(dev, ctxt, q, 4.24242); runHostTests(dev, ctxt, q, test_obj); } if (dev.get_info()) { +#ifdef ENABLE_FP64 + runSharedTests(dev, ctxt, q, 4.24242); +#endif runSharedTests(dev, ctxt, q, 4); runSharedTests(dev, ctxt, q, 42); runSharedTests(dev, ctxt, q, 424); runSharedTests(dev, ctxt, q, 4242); runSharedTests(dev, ctxt, q, sycl::half(4.2f)); runSharedTests(dev, ctxt, q, 4.242f); - runSharedTests(dev, ctxt, q, 4.24242); runSharedTests(dev, ctxt, q, test_obj); } if (dev.get_info()) { +#ifdef ENABLE_FP64 + runDeviceTests(dev, ctxt, q, 4.24242); +#endif runDeviceTests(dev, ctxt, q, 4); runDeviceTests(dev, ctxt, q, 42); runDeviceTests(dev, ctxt, q, 420); runDeviceTests(dev, ctxt, q, 4242); runDeviceTests(dev, ctxt, q, sycl::half(4.2f)); runDeviceTests(dev, ctxt, q, 4.242f); - runDeviceTests(dev, ctxt, q, 4.24242); runDeviceTests(dev, ctxt, q, test_obj); } diff --git a/SYCL/USM/fill_aspect_fp64.cpp b/SYCL/USM/fill_aspect_fp64.cpp new file mode 100644 index 0000000000..c542bd7c9e --- /dev/null +++ b/SYCL/USM/fill_aspect_fp64.cpp @@ -0,0 +1,21 @@ +//==------- fill_aspect_fp64.cpp - USM fill test for double type -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Enable FP64 part of . To be removed once DPC++ +// supports optional device features and the code could be enabled +// unconditionally without causing failures in speculative compilation +// of the kernels. +// +// REQUIRES: aspect-fp64 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DENABLE_FP64 %s -o %t1.out +// RUN: %HOST_RUN_PLACEHOLDER %t1.out +// RUN: %CPU_RUN_PLACEHOLDER %t1.out +// RUN: %GPU_RUN_PLACEHOLDER %t1.out +// RUN: %ACC_RUN_PLACEHOLDER %t1.out + +#include "fill.cpp";