From 520540fb778b8407914ad61552ac48b8c7f469c7 Mon Sep 17 00:00:00 2001 From: "aditi.kumaraswamy" Date: Wed, 14 Apr 2021 11:38:45 -0700 Subject: [PATCH 1/2] [FPGA][NFC][LSU] Updating extension test and docs for LSU change to multi_ptr --- sycl/doc/extensions/IntelFPGA/FPGALsu.md | 14 ++-- sycl/test/extensions/fpga.cpp | 93 ++++++++++++++++-------- 2 files changed, 70 insertions(+), 37 deletions(-) diff --git a/sycl/doc/extensions/IntelFPGA/FPGALsu.md b/sycl/doc/extensions/IntelFPGA/FPGALsu.md index 6bb10604a634d..c80f6d81723de 100644 --- a/sycl/doc/extensions/IntelFPGA/FPGALsu.md +++ b/sycl/doc/extensions/IntelFPGA/FPGALsu.md @@ -7,7 +7,7 @@ is included in `CL/sycl/INTEL/fpga_extensions.hpp`. The class `cl::sycl::INTEL::lsu` allows users to explicitly request that the implementation of a global memory access is configured in a certain way. The class has two member functions, `load()` and `store()` which allow loading from -and storing to a `global_ptr`, respectively, and is templated on the following +and storing to a `multi_ptr`, respectively, and is templated on the following 4 optional paremeters: 1. **`cl::sycl::INTEL::burst_coalesce`, where `B` is a boolean**: request, @@ -47,10 +47,12 @@ template class lsu final { public: lsu() = delete; - template static T load(sycl::global_ptr Ptr) { + template + static _T load(sycl::multi_ptr<_T, _space> Ptr) { + check_space<_space>(); check_load(); #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - return *__builtin_intel_fpga_mem((T *)Ptr, + return *__builtin_intel_fpga_mem((_T *)Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch, _cache_val); @@ -59,10 +61,12 @@ public: #endif } - template static void store(sycl::global_ptr Ptr, T Val) { + template + static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) { + check_space<_space>(); check_store(); #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem) - *__builtin_intel_fpga_mem((T *)Ptr, + *__builtin_intel_fpga_mem((_T *)Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch, _cache_val) = Val; diff --git a/sycl/test/extensions/fpga.cpp b/sycl/test/extensions/fpga.cpp index 023629e9530a8..86d5e1178c0d2 100644 --- a/sycl/test/extensions/fpga.cpp +++ b/sycl/test/extensions/fpga.cpp @@ -7,6 +7,30 @@ template struct ethernet_pipe_id { static constexpr unsigned id = ID; }; +template +void lsu_body(cl::sycl::multi_ptr input_ptr,cl::sycl::multi_ptr output_ptr) { + using PrefetchingLSU = + cl::sycl::INTEL::lsu, + cl::sycl::INTEL::statically_coalesce>; + + using BurstCoalescedLSU = + cl::sycl::INTEL::lsu, + cl::sycl::INTEL::statically_coalesce>; + + using CachingLSU = + cl::sycl::INTEL::lsu, + cl::sycl::INTEL::cache<1024>, + cl::sycl::INTEL::statically_coalesce>; + + using PipelinedLSU = cl::sycl::INTEL::lsu<>; + + int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0] + int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1] + + BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X + PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y +} + using ethernet_read_pipe = sycl::INTEL::kernel_readable_io_pipe, int, 0>; using ethernet_write_pipe = @@ -58,39 +82,44 @@ int main() { /*Check LSU interface*/ { - cl::sycl::buffer output_buffer(1); - auto *in_ptr = cl::sycl::malloc_host(1, Queue.get_context()); - - Queue.submit([&](cl::sycl::handler &cgh) { - auto output_accessor = - output_buffer.get_access(cgh); - cgh.single_task([=] { - cl::sycl::host_ptr input_ptr(in_ptr); - auto output_ptr = output_accessor.get_pointer(); - - using PrefetchingLSU = - cl::sycl::INTEL::lsu, - cl::sycl::INTEL::statically_coalesce>; - - using BurstCoalescedLSU = - cl::sycl::INTEL::lsu, - cl::sycl::INTEL::statically_coalesce>; - - using CachingLSU = - cl::sycl::INTEL::lsu, - cl::sycl::INTEL::cache<1024>, - cl::sycl::INTEL::statically_coalesce>; - - using PipelinedLSU = cl::sycl::INTEL::lsu<>; - - int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0] - int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1] - - BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X - PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y - }); - }); + { + auto *out_ptr = cl::sycl::malloc_host(1, Queue.get_context()); + auto *in_ptr = cl::sycl::malloc_host(1, Queue.get_context()); + Queue.submit([&](sycl::handler& cgh) { + cgh.single_task([=]() { + cl::sycl::host_ptr input_ptr(in_ptr); + cl::sycl::host_ptr output_ptr(out_ptr); + intelfpga::lsu_body(input_ptr,output_ptr); + }); + }); + } + { + auto *out_ptr = cl::sycl::malloc_device(1, Queue); + auto *in_ptr = cl::sycl::malloc_device(1, Queue); + Queue.submit([&](sycl::handler& cgh) { + cgh.single_task([=]() { + cl::sycl::device_ptr input_ptr(in_ptr); + cl::sycl::device_ptr output_ptr(out_ptr); + intelfpga::lsu_body(input_ptr,output_ptr); + }); + }); + } + { + cl::sycl::buffer output_buffer(1); + cl::sycl::buffer input_buffer(1); + Queue.submit([&](sycl::handler& cgh) { + auto output_accessor = + output_buffer.get_access(cgh); + auto input_accessor = + input_buffer.get_access(cgh); + cgh.single_task([=]() { + auto input_ptr = input_accessor.get_pointer(); + auto output_ptr = output_accessor.get_pointer(); + intelfpga::lsu_body<>(input_ptr,output_ptr); + }); + }); + } } return 0; From 288e73071fcf7fcc61b53626901a3b0253496368 Mon Sep 17 00:00:00 2001 From: "aditi.kumaraswamy" Date: Wed, 14 Apr 2021 11:53:27 -0700 Subject: [PATCH 2/2] [FPGA][NFC][LSU] Running clang-format on fpga.cpp lsu test --- sycl/test/extensions/fpga.cpp | 78 ++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/sycl/test/extensions/fpga.cpp b/sycl/test/extensions/fpga.cpp index 86d5e1178c0d2..14abb09fe3ca1 100644 --- a/sycl/test/extensions/fpga.cpp +++ b/sycl/test/extensions/fpga.cpp @@ -83,43 +83,47 @@ int main() { /*Check LSU interface*/ { - { - auto *out_ptr = cl::sycl::malloc_host(1, Queue.get_context()); - auto *in_ptr = cl::sycl::malloc_host(1, Queue.get_context()); - Queue.submit([&](sycl::handler& cgh) { - cgh.single_task([=]() { - cl::sycl::host_ptr input_ptr(in_ptr); - cl::sycl::host_ptr output_ptr(out_ptr); - intelfpga::lsu_body(input_ptr,output_ptr); - }); - }); - } - { - auto *out_ptr = cl::sycl::malloc_device(1, Queue); - auto *in_ptr = cl::sycl::malloc_device(1, Queue); - Queue.submit([&](sycl::handler& cgh) { - cgh.single_task([=]() { - cl::sycl::device_ptr input_ptr(in_ptr); - cl::sycl::device_ptr output_ptr(out_ptr); - intelfpga::lsu_body(input_ptr,output_ptr); - }); - }); - } - { - cl::sycl::buffer output_buffer(1); - cl::sycl::buffer input_buffer(1); - Queue.submit([&](sycl::handler& cgh) { - auto output_accessor = - output_buffer.get_access(cgh); - auto input_accessor = - input_buffer.get_access(cgh); - cgh.single_task([=]() { - auto input_ptr = input_accessor.get_pointer(); - auto output_ptr = output_accessor.get_pointer(); - intelfpga::lsu_body<>(input_ptr,output_ptr); - }); - }); - } + { + auto *out_ptr = cl::sycl::malloc_host(1, Queue.get_context()); + auto *in_ptr = cl::sycl::malloc_host(1, Queue.get_context()); + Queue.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { + cl::sycl::host_ptr input_ptr(in_ptr); + cl::sycl::host_ptr output_ptr(out_ptr); + intelfpga::lsu_body< + int, cl::sycl::access::address_space::global_host_space>( + input_ptr, output_ptr); + }); + }); + } + { + auto *out_ptr = cl::sycl::malloc_device(1, Queue); + auto *in_ptr = cl::sycl::malloc_device(1, Queue); + Queue.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { + cl::sycl::device_ptr input_ptr(in_ptr); + cl::sycl::device_ptr output_ptr(out_ptr); + intelfpga::lsu_body< + int, cl::sycl::access::address_space::global_device_space>( + input_ptr, output_ptr); + }); + }); + } + { + cl::sycl::buffer output_buffer(1); + cl::sycl::buffer input_buffer(1); + Queue.submit([&](sycl::handler &cgh) { + auto output_accessor = + output_buffer.get_access(cgh); + auto input_accessor = + input_buffer.get_access(cgh); + cgh.single_task([=]() { + auto input_ptr = input_accessor.get_pointer(); + auto output_ptr = output_accessor.get_pointer(); + intelfpga::lsu_body<>(input_ptr, output_ptr); + }); + }); + } } return 0;