diff --git a/SYCL/Plugin/level_zero_sub_sub_device.cpp b/SYCL/Plugin/level_zero_sub_sub_device.cpp index 6a1b8262bf..f365483391 100644 --- a/SYCL/Plugin/level_zero_sub_sub_device.cpp +++ b/SYCL/Plugin/level_zero_sub_sub_device.cpp @@ -1,4 +1,4 @@ -// REQUIRES: gpu-intel-pvc, level_zero +// REQUIRES: gpu-intel-pvc, level_zero // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %level_zero_options %s -o %t.out // RUN: env ZE_DEBUG=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER @@ -11,37 +11,36 @@ // CHECK: [getZeQueue]: create queue ordinal = 0, index = 2 (round robin in [2, 2]) // CHECK: [getZeQueue]: create queue ordinal = 0, index = 3 (round robin in [3, 3]) -#include "CL/sycl.hpp" -#include "CL/sycl/usm.hpp" #include #include #include #include +#include #include -namespace sycl = cl::sycl; +using namespace sycl; using namespace std::chrono; #define random_float() (rand() / double(RAND_MAX)) #define INTER_NUM (150) #define KERNEL_NUM (2000) -void run(std::vector &queues) { +void run(std::vector &queues) { auto N = 1024 * 16; size_t global_range = 1024; size_t local_range = 16; - float *buffer_host0 = sycl::malloc_host(N, queues[0]); - float *buffer_device0 = sycl::malloc_device(N, queues[0]); + float *buffer_host0 = malloc_host(N, queues[0]); + float *buffer_device0 = malloc_device(N, queues[0]); - float *buffer_host1 = sycl::malloc_host(N, queues[1]); - float *buffer_device1 = sycl::malloc_device(N, queues[1]); + float *buffer_host1 = malloc_host(N, queues[1]); + float *buffer_device1 = malloc_device(N, queues[1]); - float *buffer_host2 = sycl::malloc_host(N, queues[2]); - float *buffer_device2 = sycl::malloc_device(N, queues[2]); + float *buffer_host2 = malloc_host(N, queues[2]); + float *buffer_device2 = malloc_device(N, queues[2]); - float *buffer_host3 = sycl::malloc_host(N, queues[3]); - float *buffer_device3 = sycl::malloc_device(N, queues[3]); + float *buffer_host3 = malloc_host(N, queues[3]); + float *buffer_device3 = malloc_device(N, queues[3]); for (int i = 0; i < N; ++i) { buffer_host0[i] = static_cast(random_float()); @@ -57,38 +56,34 @@ void run(std::vector &queues) { for (auto m = 0; m < INTER_NUM; ++m) { for (int k = 0; k < KERNEL_NUM; ++k) { - auto event0 = queues[0].submit([&](sycl::handler &h) { + auto event0 = queues[0].submit([&](handler &h) { h.parallel_for( - cl::sycl::nd_range<1>(cl::sycl::range<1>{global_range}, - cl::sycl::range<1>{local_range}), - [=](cl::sycl::nd_item<1> item) { + nd_range<1>(range<1>{global_range}, range<1>{local_range}), + [=](nd_item<1> item) { int i = item.get_global_linear_id(); buffer_device0[i] = buffer_device0[i] + float(2.0); }); }); - auto event1 = queues[1].submit([&](sycl::handler &h) { + auto event1 = queues[1].submit([&](handler &h) { h.parallel_for( - cl::sycl::nd_range<1>(cl::sycl::range<1>{global_range}, - cl::sycl::range<1>{local_range}), - [=](cl::sycl::nd_item<1> item) { + nd_range<1>(range<1>{global_range}, range<1>{local_range}), + [=](nd_item<1> item) { int i = item.get_global_linear_id(); buffer_device1[i] = buffer_device1[i] + float(2.0); }); }); - auto event2 = queues[2].submit([&](sycl::handler &h) { + auto event2 = queues[2].submit([&](handler &h) { h.parallel_for( - cl::sycl::nd_range<1>(cl::sycl::range<1>{global_range}, - cl::sycl::range<1>{local_range}), - [=](cl::sycl::nd_item<1> item) { + nd_range<1>(range<1>{global_range}, range<1>{local_range}), + [=](nd_item<1> item) { int i = item.get_global_linear_id(); buffer_device2[i] = buffer_device2[i] + float(2.0); }); }); - auto event3 = queues[3].submit([&](sycl::handler &h) { + auto event3 = queues[3].submit([&](handler &h) { h.parallel_for( - cl::sycl::nd_range<1>(cl::sycl::range<1>{global_range}, - cl::sycl::range<1>{local_range}), - [=](cl::sycl::nd_item<1> item) { + nd_range<1>(range<1>{global_range}, range<1>{local_range}), + [=](nd_item<1> item) { int i = item.get_global_linear_id(); buffer_device3[i] = buffer_device3[i] + float(2.0); }); @@ -119,22 +114,22 @@ int main(void) { std::cout << "[info] this case is used to submit workloads to queues on " "subsub device" << std::endl; - std::vector subsub; + std::vector subsub; - auto devices = sycl::device::get_devices(sycl::info::device_type::gpu); + auto devices = device::get_devices(info::device_type::gpu); std::cout << "[info] device count = " << devices.size() << std::endl; // watch out device here auto subdevices = devices[1] .create_sub_devices< - sycl::info::partition_property::partition_by_affinity_domain>( - sycl::info::partition_affinity_domain::next_partitionable); + info::partition_property::partition_by_affinity_domain>( + info::partition_affinity_domain::next_partitionable); std::cout << "[info] sub device size = " << subdevices.size() << std::endl; for (auto &subdev : subdevices) { auto subsubdevices = subdev.create_sub_devices< - sycl::info::partition_property::partition_by_affinity_domain>( - sycl::info::partition_affinity_domain::next_partitionable); + info::partition_property::partition_by_affinity_domain>( + info::partition_affinity_domain::next_partitionable); std::cout << "[info] sub-sub device size = " << subsubdevices.size() << std::endl; for (auto &subsubdev : subsubdevices) { @@ -147,16 +142,16 @@ int main(void) { std::cout << "[important] create 4 sycl queues on first 4 sub-sub devices" << std::endl; - sycl::queue q0(subsub[0], {cl::sycl::property::queue::enable_profiling(), - cl::sycl::property::queue::in_order()}); - sycl::queue q1(subsub[1], {cl::sycl::property::queue::enable_profiling(), - cl::sycl::property::queue::in_order()}); - sycl::queue q2(subsub[2], {cl::sycl::property::queue::enable_profiling(), - cl::sycl::property::queue::in_order()}); - sycl::queue q3(subsub[4], {cl::sycl::property::queue::enable_profiling(), - cl::sycl::property::queue::in_order()}); + queue q0(subsub[0], + {property::queue::enable_profiling(), property::queue::in_order()}); + queue q1(subsub[1], + {property::queue::enable_profiling(), property::queue::in_order()}); + queue q2(subsub[2], + {property::queue::enable_profiling(), property::queue::in_order()}); + queue q3(subsub[4], + {property::queue::enable_profiling(), property::queue::in_order()}); - std::vector queues; + std::vector queues; queues.push_back(std::move(q0)); queues.push_back(std::move(q1));