Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions sycl/doc/extensions/IntelFPGA/FPGALsu.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ is included in `CL/sycl/INTEL/fpga_extensions.hpp`.
The class `cl::sycl::INTEL::lsu` allows users to explicitly request that the
implementation of a global memory access is configured in a certain way. The
class has two member functions, `load()` and `store()` which allow loading from
and storing to a `global_ptr`, respectively, and is templated on the following
and storing to a `multi_ptr`, respectively, and is templated on the following
4 optional paremeters:

1. **`cl::sycl::INTEL::burst_coalesce<B>`, where `B` is a boolean**: request,
Expand Down Expand Up @@ -47,10 +47,12 @@ template <class... mem_access_params> class lsu final {
public:
lsu() = delete;

template <typename T> static T load(sycl::global_ptr<T> Ptr) {
template <typename _T, access::address_space _space>
static _T load(sycl::multi_ptr<_T, _space> Ptr) {
check_space<_space>();
check_load();
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
return *__builtin_intel_fpga_mem((T *)Ptr,
return *__builtin_intel_fpga_mem((_T *)Ptr,
_burst_coalesce | _cache |
_dont_statically_coalesce | _prefetch,
_cache_val);
Expand All @@ -59,10 +61,12 @@ public:
#endif
}

template <typename T> static void store(sycl::global_ptr<T> Ptr, T Val) {
template <typename _T, access::address_space _space>
static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) {
check_space<_space>();
check_store();
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
*__builtin_intel_fpga_mem((T *)Ptr,
*__builtin_intel_fpga_mem((_T *)Ptr,
_burst_coalesce | _cache |
_dont_statically_coalesce | _prefetch,
_cache_val) = Val;
Expand Down
95 changes: 64 additions & 31 deletions sycl/test/extensions/fpga.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,30 @@ template <unsigned ID> struct ethernet_pipe_id {
static constexpr unsigned id = ID;
};

template <typename T, cl::sycl::access::address_space space>
void lsu_body(cl::sycl::multi_ptr<T,space> input_ptr,cl::sycl::multi_ptr<T,space> output_ptr) {
using PrefetchingLSU =
cl::sycl::INTEL::lsu<cl::sycl::INTEL::prefetch<true>,
cl::sycl::INTEL::statically_coalesce<false>>;

using BurstCoalescedLSU =
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
cl::sycl::INTEL::statically_coalesce<false>>;

using CachingLSU =
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
cl::sycl::INTEL::cache<1024>,
cl::sycl::INTEL::statically_coalesce<false>>;

using PipelinedLSU = cl::sycl::INTEL::lsu<>;

int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]

BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
}

using ethernet_read_pipe =
sycl::INTEL::kernel_readable_io_pipe<ethernet_pipe_id<0>, int, 0>;
using ethernet_write_pipe =
Expand Down Expand Up @@ -58,39 +82,48 @@ int main() {

/*Check LSU interface*/
{
cl::sycl::buffer<int, 1> output_buffer(1);
auto *in_ptr = cl::sycl::malloc_host<int>(1, Queue.get_context());

Queue.submit([&](cl::sycl::handler &cgh) {
auto output_accessor =
output_buffer.get_access<cl::sycl::access::mode::write>(cgh);

cgh.single_task<class kernel>([=] {
cl::sycl::host_ptr<int> input_ptr(in_ptr);
auto output_ptr = output_accessor.get_pointer();

using PrefetchingLSU =
cl::sycl::INTEL::lsu<cl::sycl::INTEL::prefetch<true>,
cl::sycl::INTEL::statically_coalesce<false>>;

using BurstCoalescedLSU =
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
cl::sycl::INTEL::statically_coalesce<false>>;

using CachingLSU =
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
cl::sycl::INTEL::cache<1024>,
cl::sycl::INTEL::statically_coalesce<false>>;

using PipelinedLSU = cl::sycl::INTEL::lsu<>;

int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]

BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
{
auto *out_ptr = cl::sycl::malloc_host<int>(1, Queue.get_context());
auto *in_ptr = cl::sycl::malloc_host<int>(1, Queue.get_context());
Queue.submit([&](sycl::handler &cgh) {
cgh.single_task<class HostAnnotation>([=]() {
cl::sycl::host_ptr<int> input_ptr(in_ptr);
cl::sycl::host_ptr<int> output_ptr(out_ptr);
intelfpga::lsu_body<
int, cl::sycl::access::address_space::global_host_space>(
input_ptr, output_ptr);
});
});
});
}
{
auto *out_ptr = cl::sycl::malloc_device<int>(1, Queue);
auto *in_ptr = cl::sycl::malloc_device<int>(1, Queue);
Queue.submit([&](sycl::handler &cgh) {
cgh.single_task<class DeviceAnnotation>([=]() {
cl::sycl::device_ptr<int> input_ptr(in_ptr);
cl::sycl::device_ptr<int> output_ptr(out_ptr);
intelfpga::lsu_body<
int, cl::sycl::access::address_space::global_device_space>(
input_ptr, output_ptr);
});
});
}
{
cl::sycl::buffer<int, 1> output_buffer(1);
cl::sycl::buffer<int, 1> input_buffer(1);
Queue.submit([&](sycl::handler &cgh) {
auto output_accessor =
output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
auto input_accessor =
input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
cgh.single_task<class AccessorAnnotation>([=]() {
auto input_ptr = input_accessor.get_pointer();
auto output_ptr = output_accessor.get_pointer();
intelfpga::lsu_body<>(input_ptr, output_ptr);
});
});
}
}

return 0;
Expand Down