From 3c3ddc5cee5ec3e62fa45c4429861644cbd03188 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 4 Jan 2024 06:37:44 -0500 Subject: [PATCH 1/4] Made tests portably pass on cuda/hip backends. Signed-off-by: JackAKirk --- sycl/test-e2e/USM/P2P/p2p_access.cpp | 16 +++------------ sycl/test-e2e/USM/P2P/p2p_atomics.cpp | 28 +++++++++------------------ sycl/test-e2e/USM/P2P/p2p_copy.cpp | 16 +++------------ 3 files changed, 15 insertions(+), 45 deletions(-) diff --git a/sycl/test-e2e/USM/P2P/p2p_access.cpp b/sycl/test-e2e/USM/P2P/p2p_access.cpp index cf8121db1a930..859b469f73a5f 100644 --- a/sycl/test-e2e/USM/P2P/p2p_access.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_access.cpp @@ -1,6 +1,5 @@ -// REQUIRES: cuda -// RUN: %{build} -o %t.out -// RUN: %if cuda %{ %{run} %t.out %} +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out #include #include @@ -9,17 +8,8 @@ using namespace sycl; int main() { - // Note that this code will largely be removed: it is temporary due to the - // temporary lack of multiple devices per sycl context in the Nvidia backend. - // A portable implementation, using a single gpu platform, should be possible - // once the Nvidia context issues are resolved. - //////////////////////////////////////////////////////////////////////// - std::vector Devs; - for (const auto &plt : sycl::platform::get_platforms()) { + auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu); - if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) - Devs.push_back(plt.get_devices()[0]); - } if (Devs.size() < 2) { std::cout << "Cannot test P2P capabilities, at least two devices are " "required, exiting." diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp index 9f6c254dbf915..59fd189fbd7d6 100644 --- a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -1,6 +1,5 @@ -// REQUIRES: cuda -// RUN: %if any-device-is-cuda %{ %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %} -// RUN: %if cuda %{ %{run} %t.out %} +// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_61 %} -o %t.out +// RUN: %{run} %t.out #include #include @@ -14,17 +13,8 @@ constexpr size_t N = 512; int main() { - // Note that this code will largely be removed: it is temporary due to the - // temporary lack of multiple devices per sycl context in the Nvidia backend. - // A portable implementation, using a single gpu platform, should be possible - // once the Nvidia context issues are resolved. - //////////////////////////////////////////////////////////////////////// - std::vector Devs; - for (const auto &plt : sycl::platform::get_platforms()) { + auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu); - if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) - Devs.push_back(plt.get_devices()[0]); - } if (Devs.size() < 2) { std::cout << "Cannot test P2P capabilities, at least two devices are " "required, exiting." @@ -47,18 +37,18 @@ int main() { // Enables Devs[1] to access Devs[0] memory. Devs[1].ext_oneapi_enable_peer_access(Devs[0]); - std::vector input(N); + std::vector input(N); std::iota(input.begin(), input.end(), 0); - double h_sum = 0.; + int h_sum = 0.; for (const auto &value : input) { h_sum += value; } - double *d_sum = malloc_shared(1, Queues[0]); - double *d_in = malloc_device(N, Queues[0]); + int *d_sum = malloc_shared(1, Queues[0]); + int *d_in = malloc_device(N, Queues[0]); - Queues[0].memcpy(d_in, &input[0], N * sizeof(double)); + Queues[0].memcpy(d_in, &input[0], N * sizeof(int)); Queues[0].wait(); range global_range{N}; @@ -66,7 +56,7 @@ int main() { *d_sum = 0.; Queues[1].submit([&](handler &h) { h.parallel_for(global_range, [=](id<1> i) { - sycl::atomic_ref(*d_sum) += d_in[i]; }); diff --git a/sycl/test-e2e/USM/P2P/p2p_copy.cpp b/sycl/test-e2e/USM/P2P/p2p_copy.cpp index f88f0d21af821..4cd4bf7bb8cfd 100644 --- a/sycl/test-e2e/USM/P2P/p2p_copy.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_copy.cpp @@ -1,6 +1,5 @@ -// REQUIRES: cuda -// RUN: %{build} -o %t.out -// RUN: %if cuda %{ %{run} %t.out %} +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out #include #include @@ -14,17 +13,8 @@ constexpr int N = 100; int main() { - // Note that this code will largely be removed: it is temporary due to the - // temporary lack of multiple devices per sycl context in the Nvidia backend. - // A portable implementation, using a single gpu platform, should be possible - // once the Nvidia context issues are resolved. - //////////////////////////////////////////////////////////////////////// - std::vector Devs; - for (const auto &plt : sycl::platform::get_platforms()) { + auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu); - if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) - Devs.push_back(plt.get_devices()[0]); - } if (Devs.size() < 2) { std::cout << "Cannot test P2P capabilities, at least two devices are " "required, exiting." From 9c8433382de0f078942ac0586a10352e0c6552d0 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 8 Feb 2024 09:56:57 -0500 Subject: [PATCH 2/4] Set temp CMAKE for testing. Signed-off-by: JackAKirk --- sycl/plugins/hip/CMakeLists.txt | 2 +- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt index 7bb8638d9aa13..6d455e460bcfa 100644 --- a/sycl/plugins/hip/CMakeLists.txt +++ b/sycl/plugins/hip/CMakeLists.txt @@ -5,7 +5,7 @@ set(SYCL_BUILD_PI_HIP_PLATFORM "AMD" CACHE STRING "PI HIP platform, AMD or NVIDI message(STATUS "Including the PI API HIP backend for ${SYCL_BUILD_PI_HIP_PLATFORM}.") # Set default ROCm installation directory -set(SYCL_BUILD_PI_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") +set(SYCL_BUILD_PI_HIP_ROCM_DIR "/opt/rocm-5.5.1" CACHE STRING "ROCm installation dir") # Set HIP include and lib dirs set(SYCL_BUILD_PI_HIP_INCLUDE_DIR "" CACHE STRING "Override HIP include dir path (set to \"\" for default behavior)") diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index c12757976b0eb..81298f0b92e91 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,14 +56,14 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/JackAKirk/unified-runtime.git") # commit 749d8e51ea8e56726a9fb57949d7a3f81b47b15c # Merge: 810a5774 34831f4b # Author: Kenneth Benzie (Benie) # Date: Wed Jan 3 13:27:16 2024 +0000 # Merge pull request #1198 from al42and/aa-rocm6 # [HIP] Fix build with ROCm 6.0.0 - set(UNIFIED_RUNTIME_TAG 749d8e51ea8e56726a9fb57949d7a3f81b47b15c) + set(UNIFIED_RUNTIME_TAG 0dc0d83) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From d9f675a47996ae1dfca37f3e2bf3fd939318e778 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jun 2024 08:04:09 -0700 Subject: [PATCH 3/4] Revert "Set temp CMAKE for testing." This reverts commit 9c8433382de0f078942ac0586a10352e0c6552d0. --- sycl/plugins/hip/CMakeLists.txt | 2 +- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt index 6d455e460bcfa..7bb8638d9aa13 100644 --- a/sycl/plugins/hip/CMakeLists.txt +++ b/sycl/plugins/hip/CMakeLists.txt @@ -5,7 +5,7 @@ set(SYCL_BUILD_PI_HIP_PLATFORM "AMD" CACHE STRING "PI HIP platform, AMD or NVIDI message(STATUS "Including the PI API HIP backend for ${SYCL_BUILD_PI_HIP_PLATFORM}.") # Set default ROCm installation directory -set(SYCL_BUILD_PI_HIP_ROCM_DIR "/opt/rocm-5.5.1" CACHE STRING "ROCm installation dir") +set(SYCL_BUILD_PI_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") # Set HIP include and lib dirs set(SYCL_BUILD_PI_HIP_INCLUDE_DIR "" CACHE STRING "Override HIP include dir path (set to \"\" for default behavior)") diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 81298f0b92e91..c12757976b0eb 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,14 +56,14 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/JackAKirk/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") # commit 749d8e51ea8e56726a9fb57949d7a3f81b47b15c # Merge: 810a5774 34831f4b # Author: Kenneth Benzie (Benie) # Date: Wed Jan 3 13:27:16 2024 +0000 # Merge pull request #1198 from al42and/aa-rocm6 # [HIP] Fix build with ROCm 6.0.0 - set(UNIFIED_RUNTIME_TAG 0dc0d83) + set(UNIFIED_RUNTIME_TAG 749d8e51ea8e56726a9fb57949d7a3f81b47b15c) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From ee00147e4b1aec79e894557be29f37be3b60b34b Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jun 2024 08:06:34 -0700 Subject: [PATCH 4/4] Mark requires cuda || hip || level_zero Signed-off-by: JackAKirk --- sycl/test-e2e/USM/P2P/p2p_access.cpp | 1 + sycl/test-e2e/USM/P2P/p2p_atomics.cpp | 1 + sycl/test-e2e/USM/P2P/p2p_copy.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/sycl/test-e2e/USM/P2P/p2p_access.cpp b/sycl/test-e2e/USM/P2P/p2p_access.cpp index 859b469f73a5f..3b282557089a3 100644 --- a/sycl/test-e2e/USM/P2P/p2p_access.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_access.cpp @@ -1,3 +1,4 @@ +// REQUIRES: cuda || hip || level_zero // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp index 59fd189fbd7d6..ac51f11169f27 100644 --- a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -1,3 +1,4 @@ +// REQUIRES: cuda || hip || level_zero // RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_61 %} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/USM/P2P/p2p_copy.cpp b/sycl/test-e2e/USM/P2P/p2p_copy.cpp index 4cd4bf7bb8cfd..05a2f6ee8794d 100644 --- a/sycl/test-e2e/USM/P2P/p2p_copy.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_copy.cpp @@ -1,3 +1,4 @@ +// REQUIRES: cuda || hip || level_zero // RUN: %{build} -o %t.out // RUN: %{run} %t.out