From f6bb8a34faf22d72d85bb2c4167cd4805ecba750 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Wed, 20 Jan 2021 09:45:52 -0800 Subject: [PATCH 01/34] [SYCL] Localize variables declared in inline asms. Signed-off-by: rdeodhar --- SYCL/InlineAsm/asm_if.cpp | 4 +++- SYCL/InlineAsm/asm_loop.cpp | 4 +++- SYCL/InlineAsm/asm_switch.cpp | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/SYCL/InlineAsm/asm_if.cpp b/SYCL/InlineAsm/asm_if.cpp index 6b2ba16d65..2212dd83e0 100644 --- a/SYCL/InlineAsm/asm_if.cpp +++ b/SYCL/InlineAsm/asm_if.cpp @@ -21,9 +21,11 @@ template struct KernelFunctor : WithOutputBuffer { ](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] { int Output = 0; #if defined(__SYCL_DEVICE_ONLY__) - asm volatile(".decl P1 v_type=P num_elts=1\n" + asm volatile("{\n" + ".decl P1 v_type=P num_elts=1\n" "cmp.eq (M1_NM, 8) P1 %1(0,0)<0;1,0> 0x0:b\n" "(P1) sel (M1_NM, 8) %0(0,0)<1> 0x7:d 0x8:d" + "}\n" : "=rw"(Output) : "rw"(switchField)); diff --git a/SYCL/InlineAsm/asm_loop.cpp b/SYCL/InlineAsm/asm_loop.cpp index 67c2b82f1e..b3647929eb 100644 --- a/SYCL/InlineAsm/asm_loop.cpp +++ b/SYCL/InlineAsm/asm_loop.cpp @@ -29,7 +29,8 @@ struct KernelFunctor : WithInputBuffers, WithOutputBuffer { cl::sycl::range<1>{this->getOutputBufferSize()}, [= ](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] { #if defined(__SYCL_DEVICE_ONLY__) - asm volatile(".decl P1 v_type=P num_elts=8\n" + asm volatile("{\n" + ".decl P1 v_type=P num_elts=8\n" ".decl P2 v_type=P num_elts=8\n" ".decl temp v_type=G type=d num_elts=8 align=dword\n" "mov (M1, 8) %0(0, 0)<1> 0x0:d\n" @@ -42,6 +43,7 @@ struct KernelFunctor : WithInputBuffers, WithOutputBuffer { "cmp.lt (M1, 8) P2 temp(0,0)<0;8,1> %1(0,0)<0;8,1>\n" "(P2) goto (M1, 8) label1\n" "label0:" + "}\n" : "+rw"(C[wiID]) : "rw"(A[wiID]), "rw"(B[wiID])); #else diff --git a/SYCL/InlineAsm/asm_switch.cpp b/SYCL/InlineAsm/asm_switch.cpp index 5f8a732680..8163e60948 100644 --- a/SYCL/InlineAsm/asm_switch.cpp +++ b/SYCL/InlineAsm/asm_switch.cpp @@ -21,7 +21,8 @@ template struct KernelFunctor : WithOutputBuffer { ](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] { int Output = 0; #if defined(__SYCL_DEVICE_ONLY__) - asm volatile(".decl P1 v_type=P num_elts=1\n" + asm volatile("{\n" + ".decl P1 v_type=P num_elts=1\n" ".decl P2 v_type=P num_elts=1\n" ".decl P3 v_type=P num_elts=1\n" "cmp.ne (M1_NM, 8) P1 %1(0,0)<0;1,0> 0x0:d\n" @@ -37,6 +38,7 @@ template struct KernelFunctor : WithOutputBuffer { "(P3) goto (M1, 1) label2\n" "mov (M1, 8) %0(0,0)<1> 0x7:d\n" "label2:" + "}\n" : "=rw"(Output) : "rw"(switchField)); From bb523e8bd0648c7f048385a5d735e297c13e6938 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Mon, 25 Jan 2021 15:43:49 -0800 Subject: [PATCH 02/34] [SYCL] Test for disabling range rounding. Signed-off-by: rdeodhar --- .../parallel_for_disable_range_roundup.cpp | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 SYCL/Basic/parallel_for_disable_range_roundup.cpp diff --git a/SYCL/Basic/parallel_for_disable_range_roundup.cpp b/SYCL/Basic/parallel_for_disable_range_roundup.cpp new file mode 100755 index 0000000000..d8d168a5f4 --- /dev/null +++ b/SYCL/Basic/parallel_for_disable_range_roundup.cpp @@ -0,0 +1,62 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %t.out %GPU_CHECK_PLACEHOLDER --check-prefix CHECK-DISABLED + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -sycl-std=2017 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %t.out %GPU_CHECK_PLACEHOLDER --check-prefix CHECK-DISABLED + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -sycl-std=2020 %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %t.out %GPU_CHECK_PLACEHOLDER --check-prefix CHECK-ENABLED + +#include + +using namespace sycl; + +range<1> Range1 = {0}; + +void check(const char *msg, size_t v, size_t ref) { + std::cout << msg << v << std::endl; + assert(v == ref); +} + +int try_rounding_off(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + std::cout << "Run parallel_for" << std::endl; + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<1> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0] = ITEM.get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range1.get(0), size); + check("Counter = ", Counter, size); + return 0; +} + +int main() { + int x; + + x = 10; + try_rounding_off(x); + + return 0; +} + +// CHECK-DISABLED: Run parallel_for +// CHECK-DISABLED-NOT: parallel_for range adjusted from 10 +// CHECK-DISABLED: Size seen by user = 10 +// CHECK-DISABLED-NEXT: Counter = 10 + +// CHECK-ENABLED: Run parallel_for +// CHECK-ENABLED-NEXT: parallel_for range adjusted from 10 +// CHECK-ENABLED-NEXT: Size seen by user = 10 +// CHECK-ENABLED-NEXT: Counter = 10 From d69e8a1374bb38d605b4e86c96999ec0702afe09 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 4 Aug 2022 13:37:57 -0700 Subject: [PATCH 03/34] [SYCL] Adjustments to tests to account for moving bfloat16 support out of experimental status. Signed-off-by: Rajiv Deodhar --- SYCL/BFloat16/bfloat16_builtins.cpp | 2 +- SYCL/BFloat16/bfloat16_type.hpp | 42 +++++++++++------------ SYCL/Matrix/element_wise_all_ops_cuda.cpp | 2 +- SYCL/Matrix/element_wise_wi_marray.cpp | 2 +- SYCL/Matrix/joint_matrix_tensorcore.cpp | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_builtins.cpp b/SYCL/BFloat16/bfloat16_builtins.cpp index 9ccf988492..253bd875ef 100644 --- a/SYCL/BFloat16/bfloat16_builtins.cpp +++ b/SYCL/BFloat16/bfloat16_builtins.cpp @@ -12,7 +12,7 @@ #include using namespace sycl; -using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi; constexpr int N = 60; // divisible by all tested array sizes constexpr float bf16_eps = 0.00390625; diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index 17742fb5dc..fa06d81281 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -21,7 +21,7 @@ void verify_conv_implicit(queue &q, buffer &a, range<1> &r, q.submit([&](handler &cgh) { auto A = a.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; A[index] = AVal; }); }); @@ -35,8 +35,8 @@ void verify_conv_explicit(queue &q, buffer &a, range<1> &r, auto A = a.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { uint16_t AVal = - sycl::ext::oneapi::experimental::bfloat16::from_float(A[index]); - A[index] = sycl::ext::oneapi::experimental::bfloat16::to_float(AVal); + sycl::ext::oneapi::bfloat16::from_float(A[index]); + A[index] = sycl::ext::oneapi::bfloat16::to_float(AVal); }); }); @@ -52,9 +52,9 @@ void verify_add(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal + BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal + BVal; C[index] = CVal; }); }); @@ -71,9 +71,9 @@ void verify_sub(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal - BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal - BVal; C[index] = CVal; }); }); @@ -88,8 +88,8 @@ void verify_minus(queue &q, buffer &a, range<1> &r, const float ref) { auto A = a.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = -AVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 CVal = -AVal; C[index] = CVal; }); }); @@ -106,9 +106,9 @@ void verify_mul(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal * BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal * BVal; C[index] = CVal; }); }); @@ -125,9 +125,9 @@ void verify_div(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal / BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal / BVal; C[index] = CVal; }); }); @@ -144,12 +144,12 @@ void verify_logic(queue &q, buffer &a, buffer &b, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; if (AVal) { if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || !BVal) { - sycl::ext::oneapi::experimental::bfloat16 CVal = + sycl::ext::oneapi::bfloat16 CVal = AVal != BVal ? AVal : BVal; CVal--; CVal++; diff --git a/SYCL/Matrix/element_wise_all_ops_cuda.cpp b/SYCL/Matrix/element_wise_all_ops_cuda.cpp index c73da53888..27cf26e935 100644 --- a/SYCL/Matrix/element_wise_all_ops_cuda.cpp +++ b/SYCL/Matrix/element_wise_all_ops_cuda.cpp @@ -14,7 +14,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using sycl::ext::oneapi::experimental::bfloat16; +using sycl::ext::oneapi::bfloat16; #define SG_SZ 32 constexpr size_t nWGperDim = 2; diff --git a/SYCL/Matrix/element_wise_wi_marray.cpp b/SYCL/Matrix/element_wise_wi_marray.cpp index 5b82ebf9db..99ee6d00d3 100644 --- a/SYCL/Matrix/element_wise_wi_marray.cpp +++ b/SYCL/Matrix/element_wise_wi_marray.cpp @@ -13,7 +13,7 @@ #include using namespace sycl; -using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi; using namespace sycl::ext::oneapi::experimental::matrix; #define SG_SZ 32 diff --git a/SYCL/Matrix/joint_matrix_tensorcore.cpp b/SYCL/Matrix/joint_matrix_tensorcore.cpp index 664944bacd..1500e9af7e 100644 --- a/SYCL/Matrix/joint_matrix_tensorcore.cpp +++ b/SYCL/Matrix/joint_matrix_tensorcore.cpp @@ -10,7 +10,7 @@ #include using namespace sycl; -using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi; using namespace sycl::ext::oneapi::experimental::matrix; constexpr float bf16_eps = 0.00390625; From 8da975e17fd91dc5867f0446d1abd707508aeb20 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 4 Aug 2022 13:55:45 -0700 Subject: [PATCH 04/34] Formatting change. --- SYCL/BFloat16/bfloat16_type.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index fa06d81281..a7827887d3 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -34,8 +34,7 @@ void verify_conv_explicit(queue &q, buffer &a, range<1> &r, q.submit([&](handler &cgh) { auto A = a.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - uint16_t AVal = - sycl::ext::oneapi::bfloat16::from_float(A[index]); + uint16_t AVal = sycl::ext::oneapi::bfloat16::from_float(A[index]); A[index] = sycl::ext::oneapi::bfloat16::to_float(AVal); }); }); @@ -149,8 +148,7 @@ void verify_logic(queue &q, buffer &a, buffer &b, if (AVal) { if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || !BVal) { - sycl::ext::oneapi::bfloat16 CVal = - AVal != BVal ? AVal : BVal; + sycl::ext::oneapi::bfloat16 CVal = AVal != BVal ? AVal : BVal; CVal--; CVal++; if (AVal == BVal) { From 29452c0d053b3f2b8727bad487709dab7df59850 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 8 Sep 2022 17:36:19 -0700 Subject: [PATCH 05/34] Corrections for bfloat16 moved out of experimental. --- SYCL/BFloat16/bfloat16_type.hpp | 4 ++-- SYCL/BFloat16/bfloat_hw.cpp | 2 +- SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp | 2 +- SYCL/ESIMD/api/replicate_smoke.cpp | 2 +- SYCL/ESIMD/api/simd_copy_to_from.cpp | 2 +- SYCL/ESIMD/api/simd_subscript_operator.cpp | 2 +- SYCL/ESIMD/api/simd_view_subscript_operator.cpp | 2 +- SYCL/ESIMD/api/svm_gather_scatter.cpp | 2 +- SYCL/ESIMD/api/unary_ops_heavy.cpp | 2 +- SYCL/ESIMD/esimd_test_utils.hpp | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index a7827887d3..c779a26b30 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -34,8 +34,8 @@ void verify_conv_explicit(queue &q, buffer &a, range<1> &r, q.submit([&](handler &cgh) { auto A = a.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - uint16_t AVal = sycl::ext::oneapi::bfloat16::from_float(A[index]); - A[index] = sycl::ext::oneapi::bfloat16::to_float(AVal); + sycl::ext::oneapi::bfloat16 AVal = A[index]; + A[index] = float(AVal); }); }); diff --git a/SYCL/BFloat16/bfloat_hw.cpp b/SYCL/BFloat16/bfloat_hw.cpp index 29d63c7fa9..bb1a2a1e1f 100644 --- a/SYCL/BFloat16/bfloat_hw.cpp +++ b/SYCL/BFloat16/bfloat_hw.cpp @@ -16,7 +16,7 @@ using get_uint_type_of_size = typename std::conditional_t< std::conditional_t>>>; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using Bfloat16StorageT = get_uint_type_of_size; bool test(float Val, Bfloat16StorageT Bits) { diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp index da908391f8..c34c98becc 100644 --- a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp +++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp @@ -30,7 +30,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template class TestID; diff --git a/SYCL/ESIMD/api/replicate_smoke.cpp b/SYCL/ESIMD/api/replicate_smoke.cpp index c9985edc93..abc6121211 100644 --- a/SYCL/ESIMD/api/replicate_smoke.cpp +++ b/SYCL/ESIMD/api/replicate_smoke.cpp @@ -20,7 +20,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template struct char_to_int { using type = typename std::conditional< diff --git a/SYCL/ESIMD/api/simd_copy_to_from.cpp b/SYCL/ESIMD/api/simd_copy_to_from.cpp index 2113772cd8..ee25baaefa 100644 --- a/SYCL/ESIMD/api/simd_copy_to_from.cpp +++ b/SYCL/ESIMD/api/simd_copy_to_from.cpp @@ -38,7 +38,7 @@ using namespace sycl; using namespace sycl::ext::intel; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template bool testUSM(queue &Q, T *Src, T *Dst, unsigned Off, Flags) { diff --git a/SYCL/ESIMD/api/simd_subscript_operator.cpp b/SYCL/ESIMD/api/simd_subscript_operator.cpp index 481fab3b04..19e16734ba 100644 --- a/SYCL/ESIMD/api/simd_subscript_operator.cpp +++ b/SYCL/ESIMD/api/simd_subscript_operator.cpp @@ -24,7 +24,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template bool test(queue &q) { std::cout << "Testing " << typeid(T).name() << "...\n"; diff --git a/SYCL/ESIMD/api/simd_view_subscript_operator.cpp b/SYCL/ESIMD/api/simd_view_subscript_operator.cpp index b9923fa25b..85ea650e1c 100644 --- a/SYCL/ESIMD/api/simd_view_subscript_operator.cpp +++ b/SYCL/ESIMD/api/simd_view_subscript_operator.cpp @@ -25,7 +25,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template class TestID; diff --git a/SYCL/ESIMD/api/svm_gather_scatter.cpp b/SYCL/ESIMD/api/svm_gather_scatter.cpp index 87f37fe094..6cedc76792 100644 --- a/SYCL/ESIMD/api/svm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/svm_gather_scatter.cpp @@ -25,7 +25,7 @@ using namespace sycl; using namespace sycl::ext::intel; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template bool test(queue &Q) { std::cout << " Running " << typeid(T).name() << " test, N=" << N << "...\n"; diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp index bc86616e61..f7a774e0b6 100644 --- a/SYCL/ESIMD/api/unary_ops_heavy.cpp +++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp @@ -30,7 +30,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template class TestID; diff --git a/SYCL/ESIMD/esimd_test_utils.hpp b/SYCL/ESIMD/esimd_test_utils.hpp index 453cb83b9a..b78debfee2 100644 --- a/SYCL/ESIMD/esimd_test_utils.hpp +++ b/SYCL/ESIMD/esimd_test_utils.hpp @@ -557,7 +557,7 @@ TID(uint32_t) TID(int64_t) TID(uint64_t) TID(half) -TID(sycl::ext::oneapi::experimental::bfloat16) +TID(sycl::ext::oneapi::bfloat16) TID(float) TID(double) From 63b05d164e78bcf8b62017f7783482686c1f4b54 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 14 Sep 2022 16:20:22 -0700 Subject: [PATCH 06/34] Correct test to run on multiple root devices. --- SYCL/Basic/buffer/buffer_migrate.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SYCL/Basic/buffer/buffer_migrate.cpp b/SYCL/Basic/buffer/buffer_migrate.cpp index 5016c802bd..66ea2caf09 100644 --- a/SYCL/Basic/buffer/buffer_migrate.cpp +++ b/SYCL/Basic/buffer/buffer_migrate.cpp @@ -36,6 +36,9 @@ int main() { range<1>(1), [=](id<1> ID) { Accessor[ID] |= (1 << Index); }); }); Q.wait(); + // Access buffer on host to ensure updated value is read from and sent back + // to device + Buffer.get_host_access(); ++Index; } From cb629ff50650c97d657c213ca8a8fbc73ca876bc Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 15 Sep 2022 18:10:46 -0700 Subject: [PATCH 07/34] Removed test correction since test is now disabled. --- SYCL/Basic/buffer/buffer_migrate.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/SYCL/Basic/buffer/buffer_migrate.cpp b/SYCL/Basic/buffer/buffer_migrate.cpp index 65231c6b52..715106a97b 100644 --- a/SYCL/Basic/buffer/buffer_migrate.cpp +++ b/SYCL/Basic/buffer/buffer_migrate.cpp @@ -38,9 +38,6 @@ int main() { range<1>(1), [=](id<1> ID) { Accessor[ID] |= (1 << Index); }); }); Q.wait(); - // Access buffer on host to ensure updated value is read from and sent back - // to device - Buffer.get_host_access(); ++Index; } From df36f2dc2ad2724d2f89f96e9ec328ee90da9a02 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 20 Sep 2022 16:23:16 -0700 Subject: [PATCH 08/34] Modifications and additions to bfloat16 tests. --- SYCL/BFloat16/bfloat16_conversions.cpp | 50 ++++++++++++++++++++++++++ SYCL/BFloat16/bfloat16_type.cpp | 11 +++--- SYCL/BFloat16/bfloat16_type.hpp | 15 +++++++- 3 files changed, 69 insertions(+), 7 deletions(-) create mode 100755 SYCL/BFloat16/bfloat16_conversions.cpp diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp new file mode 100755 index 0000000000..7ec68692c7 --- /dev/null +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -0,0 +1,50 @@ +// UNSUPPORTED: hip +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// Currently the feature is supported only on CPU and GPU, natively or by +// software emulation. +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUNx: %ACC_RUN_PLACEHOLDER %t.out + +//==----------- bfloat16_conversions.cpp - SYCL bfloat16 type test ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +using namespace sycl; + +template T calculate(T a, T b) { + sycl::ext::oneapi::bfloat16 x = a; + sycl::ext::oneapi::bfloat16 y = b; + sycl::ext::oneapi::bfloat16 z = x + y; + T result = z; + return result; +} + +template int test() { + T a{5.6f}; + T b{-1.1f}; + T result = calculate(a, b); + if (result == 4.5f) + return 0; + return 1; +} + +int main() { + int result; + result = test(); + result |= test(); + if (result) + std::cout << "FAIL\n"; + else + std::cout << "PASS\n"; + + return result; +} + diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index 25bb8ac15c..a741b78573 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -1,10 +1,9 @@ -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// TODO currently the feature isn't supported on most of the devices -// need to enable the test when the aspect and device_if feature are -// introduced -// RUNx: %CPU_RUN_PLACEHOLDER %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// Currently the feature is supported only on CPU and GPU, natively or by +// software emulation. +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUNx: %ACC_RUN_PLACEHOLDER %t.out //==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==// diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index c779a26b30..9e1f39dbea 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -167,7 +167,7 @@ void verify_logic(queue &q, buffer &a, buffer &b, } int run_tests() { - device dev{default_selector()}; + device dev{default_selector_v}; // TODO: replace is_gpu check with extension check when the appropriate part // of implementation ready (aspect) @@ -189,18 +189,31 @@ int run_tests() { queue q{dev}; verify_conv_implicit(q, a, r, 5.0); + std::cout << "PASS verify_conv_implicit\n"; verify_conv_explicit(q, a, r, 5.0); + std::cout << "PASS verify_conv_explicit\n"; verify_add(q, a, b, r, 7.0); + std::cout << "PASS verify_add\n"; verify_sub(q, a, b, r, 3.0); + std::cout << "PASS verify_sub\n"; verify_mul(q, a, b, r, 10.0); + std::cout << "PASS verify_mul\n"; verify_div(q, a, b, r, 2.5); + std::cout << "PASS verify_div\n"; verify_logic(q, a, b, r, 7.0); + std::cout << "PASS verify_logic\n"; verify_add(q, a, b_neg, r, 3.0); + std::cout << "PASS verify_add\n"; verify_sub(q, a, b_neg, r, 7.0); + std::cout << "PASS verify_sub\n"; verify_minus(q, a, r, -5.0); + std::cout << "PASS verify_minus\n"; verify_mul(q, a, b_neg, r, -10.0); + std::cout << "PASS verify_mul\n"; verify_div(q, a, b_neg, r, -2.5); + std::cout << "PASS verify_div\n"; verify_logic(q, a, b_neg, r, 3.0); + std::cout << "PASS verify_logic\n"; return 0; } From 5bf23846e0d98d573844df8a389c97976cc80e7b Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 20 Sep 2022 16:42:35 -0700 Subject: [PATCH 09/34] Formatting change. --- SYCL/BFloat16/bfloat16_conversions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp index 7ec68692c7..c220179565 100755 --- a/SYCL/BFloat16/bfloat16_conversions.cpp +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -6,13 +6,13 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUNx: %ACC_RUN_PLACEHOLDER %t.out -//==----------- bfloat16_conversions.cpp - SYCL bfloat16 type test ----------==// +//==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// #include #include From e9683a15a8597cb7733dc2005b9a8f7547ad4136 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 20 Sep 2022 16:52:58 -0700 Subject: [PATCH 10/34] Formatting change. --- SYCL/BFloat16/bfloat16_conversions.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp index c220179565..fff438c3ac 100755 --- a/SYCL/BFloat16/bfloat16_conversions.cpp +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -47,4 +47,3 @@ int main() { return result; } - From c066b3a5499113e1586ea46599bad8c43a701435 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Fri, 23 Sep 2022 09:14:27 -0700 Subject: [PATCH 11/34] Enable bfloat16 test on GPU only. --- SYCL/BFloat16/bfloat16_type.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index a741b78573..c146710dbd 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -1,8 +1,8 @@ // UNSUPPORTED: hip // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// Currently the feature is supported only on CPU and GPU, natively or by -// software emulation. -// RUN: %CPU_RUN_PLACEHOLDER %t.out +// Currently the feature is supported only on GPU, natively or by software +// emulation. +// RUNx: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUNx: %ACC_RUN_PLACEHOLDER %t.out From b0c0848be3e004b08a71b9df077be2575c161f0f Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 27 Sep 2022 13:23:42 -0700 Subject: [PATCH 12/34] Reenable bfloat16 test on CPU. --- SYCL/BFloat16/bfloat16_type.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index c146710dbd..5322709117 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -1,8 +1,7 @@ // UNSUPPORTED: hip // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// Currently the feature is supported only on GPU, natively or by software -// emulation. -// RUNx: %CPU_RUN_PLACEHOLDER %t.out +// TODO currently the feature isn't supported on FPGA. +// RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUNx: %ACC_RUN_PLACEHOLDER %t.out From 49a522a33b7d0382e033c87370452ac65fa5620b Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 3 Oct 2022 18:02:11 -0700 Subject: [PATCH 13/34] Corrected test to run on host and device. --- SYCL/BFloat16/bfloat16_conversions.cpp | 30 +++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp index fff438c3ac..add8eae6b1 100755 --- a/SYCL/BFloat16/bfloat16_conversions.cpp +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -20,17 +20,35 @@ using namespace sycl; template T calculate(T a, T b) { - sycl::ext::oneapi::bfloat16 x = a; + sycl::ext::oneapi::bfloat16 x = -a; sycl::ext::oneapi::bfloat16 y = b; sycl::ext::oneapi::bfloat16 z = x + y; T result = z; return result; } -template int test() { - T a{5.6f}; +template int test_device() { + T data[3] = {-7.0f, 8.1f, 0.0f}; + + queue deviceQueue; + buffer buf{data, 3}; + deviceQueue.submit([&](handler &cgh) { + accessor numbers{buf, cgh, read_write}; + cgh.single_task([=]() { numbers[2] = calculate(numbers[0], numbers[1]); }); + }); + + host_accessor hostOutAcc{buf, read_only}; + std::cout << "Device Result = " << hostOutAcc[2] << std::endl; + if (hostOutAcc[2] == 15.125f) + return 0; + return 1; +} + +template int test_host() { + T a{-5.6f}; T b{-1.1f}; T result = calculate(a, b); + std::cout << "Host Result = " << result << std::endl; if (result == 4.5f) return 0; return 1; @@ -38,8 +56,10 @@ template int test() { int main() { int result; - result = test(); - result |= test(); + result = test_host(); + result |= test_host(); + result |= test_device(); + result |= test_device(); if (result) std::cout << "FAIL\n"; else From 7e84648fdde701f38c95e339ed487ef6f2083c29 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Tue, 4 Oct 2022 16:11:07 -0700 Subject: [PATCH 14/34] Update SYCL/BFloat16/bfloat16_type.cpp Reenable execution on a CUDA> Co-authored-by: JackAKirk --- SYCL/BFloat16/bfloat16_type.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index 5322709117..d252d50d75 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -1,4 +1,7 @@ // UNSUPPORTED: hip +// RUN: %if cuda %{%clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out %} +// TODO enable the below when CI supports >=sm_80 +// RUNx: %if cuda %{%GPU_RUN_PLACEHOLDER %t.out %} // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // TODO currently the feature isn't supported on FPGA. // RUN: %CPU_RUN_PLACEHOLDER %t.out From 8f492d6464b4a7c45959a301fad463c1fbad2143 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 24 Oct 2022 23:17:20 -0700 Subject: [PATCH 15/34] Added test for use of fallback bfloat16 library. --- SYCL/BFloat16/bfloat16_example.cpp | 65 ++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 SYCL/BFloat16/bfloat16_example.cpp diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp new file mode 100755 index 0000000000..a4386dfe3b --- /dev/null +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -0,0 +1,65 @@ +/// +/// Check if bfloat16 example works using fallback libraries +/// + +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include + +using namespace sycl; +using sycl::ext::oneapi::bfloat16; + +float foo(float a, float b) { + // Convert from float to bfloat16. + bfloat16 A{a}; + bfloat16 B{b}; + + // Convert A and B from bfloat16 to float, do addition on floating-point + // numbers, then convert the result to bfloat16 and store it in C. + bfloat16 C = A + B; + + // Return the result converted from bfloat16 to float. + return C; +} + +int main(int argc, char *argv[]) { + float data[3] = {7.0, 8.1, 0.0}; + + std::cout << "CPU Result = " << foo(7.0f, 8.1f) << std::endl; + + queue deviceQueue; + buffer buf{data, 3}; + + deviceQueue.submit([&](handler &cgh) { + accessor numbers{buf, cgh, read_write}; + cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); }); + }); + + host_accessor hostOutAcc{buf, read_only}; + std::cout << "GPU Result = " << hostOutAcc[2] << std::endl; + + return 0; +} + From 4805061c10ba612135fd8e24c1804e8a34702829 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 24 Oct 2022 23:25:52 -0700 Subject: [PATCH 16/34] Formatting change --- SYCL/BFloat16/bfloat16_example.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index a4386dfe3b..d413cf99af 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -62,4 +62,3 @@ int main(int argc, char *argv[]) { return 0; } - From 33c7d2b1870d006094571dcfc1bde60407b99769 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 25 Oct 2022 10:41:30 -0700 Subject: [PATCH 17/34] Enhanced the bfloat16 fallback test. --- SYCL/BFloat16/bfloat16_example.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index d413cf99af..967ca8c10d 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -26,6 +26,13 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + + #include using namespace sycl; @@ -47,7 +54,12 @@ float foo(float a, float b) { int main(int argc, char *argv[]) { float data[3] = {7.0, 8.1, 0.0}; - std::cout << "CPU Result = " << foo(7.0f, 8.1f) << std::endl; + float result_host = foo(7.0f, 8.1f); + std::cout << "CPU Result = " << result_host << std::endl; + if (std::abs(15.1f - result_host) > 0.1) { + std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl; + return 1; + } queue deviceQueue; buffer buf{data, 3}; @@ -58,7 +70,12 @@ int main(int argc, char *argv[]) { }); host_accessor hostOutAcc{buf, read_only}; - std::cout << "GPU Result = " << hostOutAcc[2] << std::endl; + float result_device = hostOutAcc[2]; + std::cout << "GPU Result = " << result_device << std::endl; + if (std::abs(result_host - result_device) > 0.1) { + std::cout << "Test failed. CPU Result !~= GPU result" << std::endl; + return 1; + } return 0; } From 21dcd205d9633b9c181badb0c9fc737b4dc4eca2 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 25 Oct 2022 11:04:30 -0700 Subject: [PATCH 18/34] Formatting change. --- SYCL/BFloat16/bfloat16_example.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index 967ca8c10d..cc08dd8cb0 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -32,7 +32,6 @@ // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out - #include using namespace sycl; From cbfe491ee21f9d581c1bbefb589a15fae3e84062 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 25 Oct 2022 13:53:10 -0700 Subject: [PATCH 19/34] Adjusted bfloat16 aspect. --- SYCL/Basic/aspects.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SYCL/Basic/aspects.cpp b/SYCL/Basic/aspects.cpp index 428e6898c5..037de63324 100644 --- a/SYCL/Basic/aspects.cpp +++ b/SYCL/Basic/aspects.cpp @@ -54,8 +54,8 @@ int main() { if (plt.has(aspect::fp64)) { std::cout << " fp64" << std::endl; } - if (plt.has(aspect::ext_oneapi_bfloat16)) { - std::cout << " ext_oneapi_bfloat16" << std::endl; + if (plt.has(aspect::ext_oneapi_bfloat16_math_functions)) { + std::cout << " ext_oneapi_bfloat16_math_functions" << std::endl; } if (plt.has(aspect::int64_base_atomics)) { std::cout << " base atomic operations" << std::endl; From 3721b9719dfc786fb62d5d183fc08afab5d67c4f Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 25 Oct 2022 22:39:24 -0700 Subject: [PATCH 20/34] Changes to test to set up required environment. --- SYCL/BFloat16/bfloat16_example.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index cc08dd8cb0..1d2718d745 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -2,6 +2,10 @@ /// Check if bfloat16 example works using fallback libraries /// +// REQUIRES: opencl-aot, ocloc, cpu, gpu +// UNSUPPORTED: cuda +// CUDA is not compatible with SPIR. + // RUN: %clangxx -fsycl %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out From 44c6ccb8adada5fcec2b2b0703f941b29b8199d0 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 26 Oct 2022 09:46:40 -0700 Subject: [PATCH 21/34] Adjustment to test. --- SYCL/BFloat16/bfloat16_example.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index 1d2718d745..ac47ce7e4f 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -15,11 +15,9 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out From c3155bf7b4765534e77b7127cc2a3c71cbbdd992 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 26 Oct 2022 11:59:51 -0700 Subject: [PATCH 22/34] Test adjustment. --- SYCL/BFloat16/bfloat16_example.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index ac47ce7e4f..53e95f06d3 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -14,19 +14,14 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out From f2edee8ea73df468179bc9c2ea9cc0378421d22c Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 26 Oct 2022 13:40:20 -0700 Subject: [PATCH 23/34] Reenabled some tests with specific GPU requirement. --- SYCL/BFloat16/bfloat16_example.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index 53e95f06d3..7af0092877 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -2,7 +2,7 @@ /// Check if bfloat16 example works using fallback libraries /// -// REQUIRES: opencl-aot, ocloc, cpu, gpu +// REQUIRES: opencl-aot, ocloc, cpu, gpu-intel-gen9 // UNSUPPORTED: cuda // CUDA is not compatible with SPIR. @@ -14,14 +14,20 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out From 2c6f4cefd58d638682bf159cba634a4d01c1f3d0 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 7 Nov 2022 09:30:29 -0800 Subject: [PATCH 24/34] Changes for bfloat16 moved out of experimental. --- SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp b/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp index 483b15b567..456b040b36 100644 --- a/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp +++ b/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp @@ -18,7 +18,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; #define SG_SZ 16 @@ -144,13 +144,13 @@ int main() { for (int j = 0; j < MATRIX_K; j++) { // bfloat16 is created using unsigned short since conversion from float to // bfloat16 is not supported on the host side yet - A[i][j] = bfloat16::from_bits(make_bf16(1.0f * (i + j))); + A[i][j] = make_bf16(1.0f * (i + j)); Aref[i][j] = make_bf16(1.0f * (i + j)); } } for (int i = 0; i < MATRIX_K / 2; i++) { for (int j = 0; j < MATRIX_N * 2; j++) { - B[i][j] = bfloat16::from_bits((make_bf16(2.0f * i + 3.0f * j))); + B[i][j] = make_bf16(2.0f * i + 3.0f * j); Bref[i][j] = make_bf16(2.0f * i + 3.0f * j); } } From 43768d840b8b4a223d69d5a9309a8d1d6c8744a3 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 7 Nov 2022 21:09:47 -0800 Subject: [PATCH 25/34] Aspect bfloat16 has been removed. --- SYCL/BFloat16/bfloat16_type_cuda.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type_cuda.cpp b/SYCL/BFloat16/bfloat16_type_cuda.cpp index 30d1f122a2..81c4a08f12 100644 --- a/SYCL/BFloat16/bfloat16_type_cuda.cpp +++ b/SYCL/BFloat16/bfloat16_type_cuda.cpp @@ -12,13 +12,4 @@ #include "bfloat16_type.hpp" -int main() { - bool has_bfloat16_aspect = false; - for (const auto &plt : sycl::platform::get_platforms()) { - if (plt.has(aspect::ext_oneapi_bfloat16)) - has_bfloat16_aspect = true; - } - - if (has_bfloat16_aspect) - return run_tests(); -} +int main() { return run_tests(); } From 5ba6d7426436cafce7e1e19bc9b6e9c80c121028 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 7 Nov 2022 21:22:40 -0800 Subject: [PATCH 26/34] Aspect bfloat16 has been replaced by bfloat16_math_functions. --- SYCL/BFloat16/bfloat16_builtins.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_builtins.cpp b/SYCL/BFloat16/bfloat16_builtins.cpp index 253bd875ef..262550c5ed 100644 --- a/SYCL/BFloat16/bfloat16_builtins.cpp +++ b/SYCL/BFloat16/bfloat16_builtins.cpp @@ -222,7 +222,7 @@ bool check(float a, float b) { int main() { queue q; - if (q.get_device().has(aspect::ext_oneapi_bfloat16)) { + if (q.get_device().has(aspect::ext_oneapi_bfloat16_math_functions)) { std::vector a(N), b(N), c(N); int err = 0; From ccc85dffd0587560abad505d5928aab43e723dea Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 7 Nov 2022 23:49:25 -0800 Subject: [PATCH 27/34] Check aspect fp16 before using sycl::half. --- SYCL/BFloat16/bfloat16_conversions.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp index add8eae6b1..78442f2ffc 100755 --- a/SYCL/BFloat16/bfloat16_conversions.cpp +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -27,12 +27,11 @@ template T calculate(T a, T b) { return result; } -template int test_device() { +template int test_device(queue Q) { T data[3] = {-7.0f, 8.1f, 0.0f}; - queue deviceQueue; buffer buf{data, 3}; - deviceQueue.submit([&](handler &cgh) { + Q.submit([&](handler &cgh) { accessor numbers{buf, cgh, read_write}; cgh.single_task([=]() { numbers[2] = calculate(numbers[0], numbers[1]); }); }); @@ -55,11 +54,13 @@ template int test_host() { } int main() { + queue Q; int result; result = test_host(); result |= test_host(); - result |= test_device(); - result |= test_device(); + if (Q.get_device().has(aspect::fp16)) + result |= test_device(Q); + result |= test_device(Q); if (result) std::cout << "FAIL\n"; else From 3b60cb1ddc667919e1ea83118c62ff6635a540d5 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 8 Nov 2022 08:47:34 -0800 Subject: [PATCH 28/34] Account for lack of fp16 support on some devices. --- SYCL/BFloat16/bfloat16_conversions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp index 78442f2ffc..ed5d7ae583 100755 --- a/SYCL/BFloat16/bfloat16_conversions.cpp +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -1,5 +1,5 @@ // UNSUPPORTED: hip -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out // Currently the feature is supported only on CPU and GPU, natively or by // software emulation. // RUN: %CPU_RUN_PLACEHOLDER %t.out From 1b0d6f248bf333ad34276da380bad64c33d5d86b Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 8 Nov 2022 18:10:08 -0800 Subject: [PATCH 29/34] Reduce expected precision of bfloat16 calculations. --- SYCL/BFloat16/bfloat16_type.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index 9e1f39dbea..6c376437f4 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -11,8 +11,7 @@ constexpr size_t N = 100; template void assert_close(const T &C, const float ref) { for (size_t i = 0; i < N; i++) { auto diff = C[i] - ref; - assert(std::fabs(static_cast(diff)) < - std::numeric_limits::epsilon()); + assert(std::fabs(static_cast(diff)) < 0.1); } } From 8d9f34fa97332b79e1f8b760bc1f1a2aa74a7232 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 9 Nov 2022 06:00:06 -0800 Subject: [PATCH 30/34] Adjustments for bfloat16 header. --- SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp | 2 +- SYCL/Matrix/joint_matrix_bfloat16.cpp | 2 +- SYCL/Matrix/joint_matrix_bfloat16_use.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp b/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp index 71a7f9fa66..5222b8d025 100644 --- a/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp +++ b/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp @@ -14,7 +14,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using sycl::ext::oneapi::experimental::bfloat16; +using sycl::ext::oneapi::bfloat16; #define SG_SZ 32 constexpr size_t nWGperDim = 2; diff --git a/SYCL/Matrix/joint_matrix_bfloat16.cpp b/SYCL/Matrix/joint_matrix_bfloat16.cpp index 4d35b5f5a5..e665617156 100644 --- a/SYCL/Matrix/joint_matrix_bfloat16.cpp +++ b/SYCL/Matrix/joint_matrix_bfloat16.cpp @@ -16,7 +16,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; #define SG_SZ 16 diff --git a/SYCL/Matrix/joint_matrix_bfloat16_use.cpp b/SYCL/Matrix/joint_matrix_bfloat16_use.cpp index 5c110336e3..aa6412195e 100644 --- a/SYCL/Matrix/joint_matrix_bfloat16_use.cpp +++ b/SYCL/Matrix/joint_matrix_bfloat16_use.cpp @@ -17,7 +17,7 @@ #include using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; #define SG_SZ 16 From f1975c510b4af8df48ab210e2f11068ed0f50e5e Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 15 Nov 2022 10:34:38 -0800 Subject: [PATCH 31/34] Test adjustment. --- SYCL/BFloat16/bfloat16_type.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index d252d50d75..28f1bf621b 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -2,7 +2,7 @@ // RUN: %if cuda %{%clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out %} // TODO enable the below when CI supports >=sm_80 // RUNx: %if cuda %{%GPU_RUN_PLACEHOLDER %t.out %} -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // TODO currently the feature isn't supported on FPGA. // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out From 80c3468b01f6fc47cafa5507788ebe0f195f6c24 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 17 Nov 2022 21:03:58 -0800 Subject: [PATCH 32/34] Replace double constants with float. --- SYCL/BFloat16/bfloat16_type.hpp | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index 6c376437f4..570755bf67 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -152,8 +152,8 @@ void verify_logic(queue &q, buffer &a, buffer &b, CVal++; if (AVal == BVal) { CVal -= AVal; - CVal *= 3.0; - CVal /= 2.0; + CVal *= 3.0f; + CVal /= 2.0f; } else CVal += BVal; C[index] = CVal; @@ -176,9 +176,9 @@ int run_tests() { return 0; } - std::vector vec_a(N, 5.0); - std::vector vec_b(N, 2.0); - std::vector vec_b_neg(N, -2.0); + std::vector vec_a(N, 5.0f); + std::vector vec_b(N, 2.0f); + std::vector vec_b_neg(N, -2.0f); range<1> r(N); buffer a{vec_a.data(), r}; @@ -187,31 +187,31 @@ int run_tests() { queue q{dev}; - verify_conv_implicit(q, a, r, 5.0); + verify_conv_implicit(q, a, r, 5.0f); std::cout << "PASS verify_conv_implicit\n"; - verify_conv_explicit(q, a, r, 5.0); + verify_conv_explicit(q, a, r, 5.0f); std::cout << "PASS verify_conv_explicit\n"; - verify_add(q, a, b, r, 7.0); + verify_add(q, a, b, r, 7.0f); std::cout << "PASS verify_add\n"; - verify_sub(q, a, b, r, 3.0); + verify_sub(q, a, b, r, 3.0f); std::cout << "PASS verify_sub\n"; - verify_mul(q, a, b, r, 10.0); + verify_mul(q, a, b, r, 10.0f); std::cout << "PASS verify_mul\n"; - verify_div(q, a, b, r, 2.5); + verify_div(q, a, b, r, 2.5f); std::cout << "PASS verify_div\n"; - verify_logic(q, a, b, r, 7.0); + verify_logic(q, a, b, r, 7.0f); std::cout << "PASS verify_logic\n"; - verify_add(q, a, b_neg, r, 3.0); + verify_add(q, a, b_neg, r, 3.0f); std::cout << "PASS verify_add\n"; - verify_sub(q, a, b_neg, r, 7.0); + verify_sub(q, a, b_neg, r, 7.0f); std::cout << "PASS verify_sub\n"; - verify_minus(q, a, r, -5.0); + verify_minus(q, a, r, -5.0f); std::cout << "PASS verify_minus\n"; - verify_mul(q, a, b_neg, r, -10.0); + verify_mul(q, a, b_neg, r, -10.0f); std::cout << "PASS verify_mul\n"; - verify_div(q, a, b_neg, r, -2.5); + verify_div(q, a, b_neg, r, -2.5f); std::cout << "PASS verify_div\n"; - verify_logic(q, a, b_neg, r, 3.0); + verify_logic(q, a, b_neg, r, 3.0f); std::cout << "PASS verify_logic\n"; return 0; From db0a1bce18bb96ef9273139078621b03acc4530e Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 17 Nov 2022 21:51:30 -0800 Subject: [PATCH 33/34] Replace double constants with float. --- SYCL/BFloat16/bfloat16_example.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp index 7af0092877..fab3795679 100755 --- a/SYCL/BFloat16/bfloat16_example.cpp +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -54,11 +54,11 @@ float foo(float a, float b) { } int main(int argc, char *argv[]) { - float data[3] = {7.0, 8.1, 0.0}; + float data[3] = {7.0f, 8.1f, 0.0f}; float result_host = foo(7.0f, 8.1f); std::cout << "CPU Result = " << result_host << std::endl; - if (std::abs(15.1f - result_host) > 0.1) { + if (std::abs(15.1f - result_host) > 0.1f) { std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl; return 1; } @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { host_accessor hostOutAcc{buf, read_only}; float result_device = hostOutAcc[2]; std::cout << "GPU Result = " << result_device << std::endl; - if (std::abs(result_host - result_device) > 0.1) { + if (std::abs(result_host - result_device) > 0.1f) { std::cout << "Test failed. CPU Result !~= GPU result" << std::endl; return 1; } From 8511e529ab83c8a77c921eb103ecc986c9cef049 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 22 Nov 2022 00:41:52 -0800 Subject: [PATCH 34/34] Fix test to return 0 from main. --- SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp b/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp index a3661cab56..7d0c7dcc5f 100644 --- a/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp +++ b/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp @@ -35,4 +35,6 @@ int main() { } catch (...) { // Ignore all exceptions } + + return 0; }