From 76310a979f4d3984ac9fb336bf828506ab146758 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 21 Apr 2025 14:01:28 +0800 Subject: [PATCH 1/3] [SYCL] Test bfloat16 devicelib with dlopen/dlclose Signed-off-by: jinge90 --- .../bfloat16_conversion_dlopen_test.cpp | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp new file mode 100644 index 0000000000000..6fd14fabe84e0 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp @@ -0,0 +1,112 @@ +//==----------- bf1oat16 devicelib dlopen test for SYCL JIT ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: linux + +// RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so + +// DEFINE: %{compile} = %{build} -DFNAME=%basename_t -ldl -Wl,-rpath=%T + +// RUN: %{compile} -o %t1.out +// RUN: %{run} %t1.out + +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia. + +#include +#include + +#include +#include + +using namespace sycl; + +constexpr access::mode sycl_read = access::mode::read; +constexpr access::mode sycl_write = access::mode::write; + +using BFP = sycl::ext::oneapi::bfloat16; + +#ifdef BUILD_LIB +class FOO_KERN; +void foo() { + sycl::queue deviceQueue; + BFP bf16_v; + float fp32_v = 16.5f; + { + buffer fp32_buffer{&fp32_v, 1}; + buffer bf16_buffer{&bf16_v, 1}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.get_access(cgh); + auto bf16_acc = bf16_buffer.get_access(cgh); + cgh.single_task([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; }); + }) + .wait(); + } + std::cout << "In foo: " << bf16_v << std::endl; +} +#else + +class MAINRUN; +void main_run(sycl::queue &deviceQueue) { + BFP bf16_v; + float fp32_v = 16.5f; + { + buffer fp32_buffer{&fp32_v, 1}; + buffer bf16_buffer{&bf16_v, 1}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.get_access(cgh); + auto bf16_acc = bf16_buffer.get_access(cgh); + cgh.single_task( + [=]() { bf16_acc[0] = BFP{fp32_acc[0] + 0.5f}; }); + }) + .wait(); + } + std::cout << "In run: " << bf16_v << std::endl; +} + +#define STRINGIFY_HELPER(A) #A +#define STRINGIFY(A) STRINGIFY_HELPER(A) +#define SO_FNAME "lib" STRINGIFY(FNAME) ".so" + +int main() { + BFP bf16_array[3]; + float fp32_array[3] = {7.0f, 8.5f, 0.5f}; + + sycl::queue deviceQueue; + + main_run(deviceQueue); + + void *handle = dlopen(SO_FNAME, RTLD_LAZY); + void (*func)(); + *(void **)(&func) = dlsym(handle, "_Z3foov"); + func(); + dlclose(handle); + + { + buffer fp32_buffer{fp32_array, 3}; + buffer bf16_buffer{bf16_array, 3}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.get_access(cgh); + auto bf16_acc = bf16_buffer.get_access(cgh); + cgh.single_task([=]() { + bf16_acc[0] = BFP{fp32_acc[0]}; + bf16_acc[1] = BFP{fp32_acc[1]}; + bf16_acc[2] = BFP{fp32_acc[2]}; + }); + }) + .wait(); + } + std::cout << "In main: " << bf16_array[0] << " " << bf16_array[1] << " " + << bf16_array[2] << std::endl; + + return 0; +} +#endif From d4754f2701b446e00398ee19fb8c6cd7d3800e1f Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 23 Apr 2025 14:22:20 +0800 Subject: [PATCH 2/3] add get_kernel_ids check for dlopen/dlclose Signed-off-by: jinge90 --- .../bfloat16_conversion_dlopen_test.cpp | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp index 6fd14fabe84e0..f6c17756388e9 100644 --- a/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -78,17 +79,35 @@ void main_run(sycl::queue &deviceQueue) { int main() { BFP bf16_array[3]; float fp32_array[3] = {7.0f, 8.5f, 0.5f}; - sycl::queue deviceQueue; - + std::vector all_kernel_ids; + bool dynlib_kernel_available = false; + bool dynlib_kernel_unavailable = true; main_run(deviceQueue); void *handle = dlopen(SO_FNAME, RTLD_LAZY); void (*func)(); *(void **)(&func) = dlsym(handle, "_Z3foov"); func(); + all_kernel_ids = sycl::get_kernel_ids(); + for (auto k : all_kernel_ids) { + if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN")) + dynlib_kernel_available = true; + } + + // Before dlclose, the FOO_KERN from sycl dynamic library must exist. + assert(dynlib_kernel_available); + dlclose(handle); + all_kernel_ids = sycl::get_kernel_ids(); + for (auto k : all_kernel_ids) { + if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN")) + dynlib_kernel_unavailable = false; + } + + assert(dynlib_kernel_unavailable); + { buffer fp32_buffer{fp32_array, 3}; buffer bf16_buffer{bf16_array, 3}; From 0e442dae8fdc24aceef46f7f5af66271b87309f1 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 23 Apr 2025 19:24:34 +0800 Subject: [PATCH 3/3] address review comments Signed-off-by: jinge90 --- .../bfloat16_conversion_dlopen_test.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp index f6c17756388e9..2b5c1a8d1bb74 100644 --- a/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp @@ -6,13 +6,19 @@ // //===----------------------------------------------------------------------===// +// The case uses dlopen/close to load/unload a sycl shared library which +// depends bfloat16 device library and the main function also includes sycl +// kernels which depend on bfloat16 device library. SYCL program manager will +// own the bfloat16 device library image which is shared by all kernels using +// bfloat16 features, so the program should also work well when the shared +// library is dlclosed and the device images are removed. + // REQUIRES: linux // RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so -// DEFINE: %{compile} = %{build} -DFNAME=%basename_t -ldl -Wl,-rpath=%T +// RUN: %{build} -DFNAME=%basename_t -ldl -Wl,-rpath=%T -o %t1.out -// RUN: %{compile} -o %t1.out // RUN: %{run} %t1.out // UNSUPPORTED: target-nvidia || target-amd @@ -35,7 +41,7 @@ using BFP = sycl::ext::oneapi::bfloat16; #ifdef BUILD_LIB class FOO_KERN; void foo() { - sycl::queue deviceQueue; + queue deviceQueue; BFP bf16_v; float fp32_v = 16.5f; { @@ -54,7 +60,7 @@ void foo() { #else class MAINRUN; -void main_run(sycl::queue &deviceQueue) { +void main_run(queue &deviceQueue) { BFP bf16_v; float fp32_v = 16.5f; { @@ -79,7 +85,7 @@ void main_run(sycl::queue &deviceQueue) { int main() { BFP bf16_array[3]; float fp32_array[3] = {7.0f, 8.5f, 0.5f}; - sycl::queue deviceQueue; + queue deviceQueue; std::vector all_kernel_ids; bool dynlib_kernel_available = false; bool dynlib_kernel_unavailable = true;