Skip to content

Commit

Permalink
rewrite tests and examples to use accessors
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Nov 9, 2021
1 parent 4450298 commit 21626fa
Show file tree
Hide file tree
Showing 50 changed files with 1,002 additions and 785 deletions.
13 changes: 5 additions & 8 deletions docs/source/basic/library.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,14 @@ Accessors
`````````

An accessor is an interface to access the data stored by a buffer or, more generally, a view.
Accessors take care of the multidimensionality of their underlying buffers when indexed, including pitched allocations.
Accessors take care of the multidimensionality of their underlying buffers when indexed.
It is created via one of the following calls:

.. code-block:: cpp
auto accessor = alpaka::experimental::access(buffer); // read/write
auto accessor = alpaka::experimental::readAccess(buffer); // read
auto accessor = alpaka::experimental::writeAccess(buffer); // write
auto accessor = alpaka::access(buffer); // read/write
auto accessor = alpaka::readAccess(buffer); // read
auto accessor = alpaka::writeAccess(buffer); // write
Accessors have many template parameter and users are adviced to use the ``BufferAccessor`` convenience alias to get the type of an accessor for a buffer of a given accelerator.
Example kernel with 3D accessor:
Expand All @@ -234,10 +234,7 @@ Example kernel with 3D accessor:
template<typename Acc>
ALPAKA_FN_ACC void operator()(Acc const & acc, alpaka::experimental::BufferAccessor<Acc, float, 3, alpaka::experimental::WriteAccess> data) const {
...
for(Idx z = 0; z < data.extents[0]; ++z)
for(Idx y = 0; y < data.extents[1]; ++y)
for(Idx x = 0; x < data.extents[2]; ++x)
data(z, y, x) = 42.0f;
data(z, y, x) = 42.0f;
}
};
...
Expand Down
167 changes: 45 additions & 122 deletions example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2019 Alexander Matthes, Benjamin Worpitz, Erik Zenker, Matthias Werner
/* Copyright 2019-2021 Alexander Matthes, Benjamin Worpitz, Erik Zenker, Matthias Werner, Bernhard Manfred Gruber
*
* This file exemplifies usage of alpaka.
*
Expand Down Expand Up @@ -32,72 +32,58 @@ ALPAKA_FN_ACC size_t linIdxToPitchedIdx(size_t const globalIdx, size_t const pit
//! Prints all elements of the buffer.
struct PrintBufferKernel
{
template<typename TAcc, typename TData, typename TExtent>
template<typename TAcc, typename TData>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
TData const* const buffer,
TExtent const& extents,
size_t const pitch) const -> void
alpaka::experimental::BufferAccessor<TAcc, TData, 3, alpaka::experimental::ReadAccess> const data) const
-> void
{
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const globalThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridSize = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

auto const linearizedGlobalThreadIdx = alpaka::mapIdx<1u>(globalThreadIdx, globalThreadExtent);

for(size_t i(linearizedGlobalThreadIdx[0]); i < extents.prod(); i += globalThreadExtent.prod())
{
// NOTE: hard-coded for unsigned int
printf("%u:%u ", static_cast<uint32_t>(i), static_cast<uint32_t>(buffer[linIdxToPitchedIdx<2>(i, pitch)]));
}
for(size_t z = idx[0]; z < data.extents[0]; z += gridSize[0])
for(size_t y = idx[1]; y < data.extents[1]; y += gridSize[1])
for(size_t x = idx[2]; x < data.extents[2]; x += gridSize[2])
printf("%zu,%zu,%zu:%u ", z, y, x, static_cast<uint32_t>(data[{z, y, x}]));
}
};


//! Tests if the value of the buffer on index i is equal to i.
struct TestBufferKernel
{
template<typename TAcc, typename TData, typename TExtent>
template<typename TAcc, typename TData>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
TData const* const
#ifndef NDEBUG
data
#endif
,
TExtent const& extents,
size_t const
#ifndef NDEBUG
pitch
#endif
) const -> void
alpaka::experimental::BufferAccessor<TAcc, TData, 3, alpaka::experimental::ReadAccess> const data) const
-> void
{
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const globalThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

auto const linearizedGlobalThreadIdx = alpaka::mapIdx<1u>(globalThreadIdx, globalThreadExtent);

for(size_t i(linearizedGlobalThreadIdx[0]); i < extents.prod(); i += globalThreadExtent.prod())
{
ALPAKA_ASSERT_OFFLOAD(data[linIdxToPitchedIdx<2>(i, pitch)] == i);
}
auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridSize = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for(size_t z = idx[0]; z < data.extents[0]; z += gridSize[0])
for(size_t y = idx[1]; y < data.extents[1]; y += gridSize[1])
for(size_t x = idx[2]; x < data.extents[2]; x += gridSize[2])
ALPAKA_ASSERT_OFFLOAD(
data(z, y, x) == alpaka::mapIdx<1u>(decltype(data.extents){z, y, x}, data.extents)[0]);
}
};

//! Fills values of buffer with increasing elements starting from 0
struct FillBufferKernel
{
template<typename TAcc, typename TData, typename TExtent>
ALPAKA_FN_ACC auto operator()(TAcc const& acc, TData* const data, TExtent const& extents) const -> void
template<typename TAcc, typename TData>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
alpaka::experimental::BufferAccessor<TAcc, TData, 3, alpaka::experimental::WriteAccess> const data) const
-> void
{
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const globalThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

auto const linearizedGlobalThreadIdx = alpaka::mapIdx<1u>(globalThreadIdx, globalThreadExtent);
auto const idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridSize = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for(size_t i(linearizedGlobalThreadIdx[0]); i < extents.prod(); i += globalThreadExtent.prod())
{
data[i] = static_cast<TData>(i);
}
for(size_t z = idx[0]; z < data.extents[0]; z += gridSize[0])
for(size_t y = idx[1]; y < data.extents[1]; y += gridSize[1])
for(size_t x = idx[2]; x < data.extents[2]; x += gridSize[2])
data(z, y, x) = alpaka::mapIdx<1u>(idx, data.extents)[0];
}
};

Expand Down Expand Up @@ -209,35 +195,25 @@ auto main() -> int

// Init host buffer
//
// You can not access the inner
// elements of a buffer directly, but
// you can get the pointer to the memory
// (getPtrNative).
Data* const pHostBuffer = alpaka::getPtrNative(hostBuffer);
// You can not access the inner elements of a buffer directly, but you can get the pointer to the memory via
// getPtrNative() or a read/write accessor using access().
auto hostBufferAccessor = alpaka::experimental::access(hostBuffer);

// This pointer can be used to directly write
// some values into the buffer memory.
// Mind, that only a host can write on host memory.
// The same holds true for device memory.
for(Idx i(0); i < extents.prod(); ++i)
{
pHostBuffer[i] = static_cast<Data>(i);
}
for(size_t z = 0; z < extents[0]; z++)
for(size_t y = 0; y < extents[1]; y++)
for(size_t x = 0; x < extents[2]; x++)
hostBufferAccessor(z, y, x) = static_cast<Data>(alpaka::mapIdx<1u>(Vec{z, y, x}, extents)[0]);

// Memory views and buffers can also be initialized by executing a kernel.
// To pass a buffer into a kernel, you can pass the
// native pointer into the kernel invocation.
Data* const pHostViewPlainPtr = alpaka::getPtrNative(hostViewPlainPtr);

FillBufferKernel fillBufferKernel;

alpaka::exec<Host>(
hostQueue,
hostWorkDiv,
fillBufferKernel,
pHostViewPlainPtr, // 1st kernel argument
extents); // 2nd kernel argument

alpaka::exec<Host>(hostQueue, hostWorkDiv, fillBufferKernel, alpaka::experimental::writeAccess(hostViewPlainPtr));

// Copy host to device Buffer
//
Expand All @@ -253,40 +229,15 @@ auto main() -> int
alpaka::memcpy(devQueue, deviceBuffer1, hostViewPlainPtr, extents);
alpaka::memcpy(devQueue, deviceBuffer2, hostBuffer, extents);

// Depending on the accelerator, the allocation function may introduce
// padding between rows/planes of multidimensional memory allocations.
// Therefore the pitch (distance between consecutive rows/planes) may be
// greater than the space required for the data.
Idx const deviceBuffer1Pitch(alpaka::getPitchBytes<2u>(deviceBuffer1) / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchBytes<2u>(deviceBuffer2) / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchBytes<2u>(hostBuffer) / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchBytes<2u>(hostViewPlainPtr) / sizeof(Data));

// Test device Buffer
//
// This kernel tests if the copy operations
// were successful. In the case something
// went wrong an assert will fail.
Data const* const pDeviceBuffer1 = alpaka::getPtrNative(deviceBuffer1);
Data const* const pDeviceBuffer2 = alpaka::getPtrNative(deviceBuffer2);

TestBufferKernel testBufferKernel;
alpaka::exec<Acc>(
devQueue,
devWorkDiv,
testBufferKernel,
pDeviceBuffer1, // 1st kernel argument
extents, // 2nd kernel argument
deviceBuffer1Pitch); // 3rd kernel argument

alpaka::exec<Acc>(
devQueue,
devWorkDiv,
testBufferKernel,
pDeviceBuffer2, // 1st kernel argument
extents, // 2nd kernel argument
deviceBuffer2Pitch); // 3rd kernel argument

alpaka::exec<Acc>(devQueue, devWorkDiv, testBufferKernel, alpaka::experimental::readAccess(deviceBuffer1));
alpaka::exec<Acc>(devQueue, devWorkDiv, testBufferKernel, alpaka::experimental::readAccess(deviceBuffer2));

// Print device Buffer
//
Expand All @@ -299,43 +250,15 @@ auto main() -> int
// completely distorted.

PrintBufferKernel printBufferKernel;
alpaka::exec<Acc>(
devQueue,
devWorkDiv,
printBufferKernel,
pDeviceBuffer1, // 1st kernel argument
extents, // 2nd kernel argument
deviceBuffer1Pitch); // 3rd kernel argument
alpaka::wait(devQueue);
std::cout << std::endl;

alpaka::exec<Acc>(
devQueue,
devWorkDiv,
printBufferKernel,
pDeviceBuffer2, // 1st kernel argument
extents, // 2nd kernel argument
deviceBuffer2Pitch); // 3rd kernel argument
alpaka::exec<Acc>(devQueue, devWorkDiv, printBufferKernel, alpaka::experimental::readAccess(deviceBuffer1));
alpaka::wait(devQueue);
std::cout << std::endl;

alpaka::exec<Host>(
hostQueue,
hostWorkDiv,
printBufferKernel,
pHostBuffer, // 1st kernel argument
extents, // 2nd kernel argument
hostBuffer1Pitch); // 3rd kernel argument
alpaka::exec<Host>(hostQueue, hostWorkDiv, printBufferKernel, alpaka::experimental::readAccess(hostBuffer));
alpaka::wait(hostQueue);
std::cout << std::endl;

alpaka::exec<Host>(
hostQueue,
hostWorkDiv,
printBufferKernel,
pHostViewPlainPtr, // 1st kernel argument
extents, // 2nd kernel argument
hostViewPlainPtrPitch); // 3rd kernel argument
alpaka::exec<Host>(hostQueue, hostWorkDiv, printBufferKernel, alpaka::experimental::readAccess(hostViewPlainPtr));
alpaka::wait(hostQueue);
std::cout << std::endl;

Expand Down
30 changes: 18 additions & 12 deletions example/heatEquation/src/heatEquation.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
/* Copyright 2020 Benjamin Worpitz, Matthias Werner, Jakob Krude,
* Sergei Bastrakov
/* Copyright 2020-2021 Benjamin Worpitz, Matthias Werner, Jakob Krude, Sergei Bastrakov, Bernhard Manfred Gruber
*
* This file exemplifies usage of alpaka.
*
Expand Down Expand Up @@ -40,19 +39,20 @@

struct HeatEquationKernel
{
template<typename TAcc>
template<typename TAcc, typename TMemoryHandle, typename TIdx>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
double const* const uCurrBuf,
double* const uNextBuf,
uint32_t const extent,
alpaka::experimental::Accessor<TMemoryHandle, double, TIdx, 1, alpaka::experimental::ReadAccess> const
uCurrBuf,
alpaka::experimental::Accessor<TMemoryHandle, double, TIdx, 1, alpaka::experimental::WriteAccess> const
uNextBuf,
double const dx,
double const dt) const -> void
{
// Each kernel executes one element
double const r = dt / (dx * dx);
int idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
if(idx > 0 && idx < extent - 1u)
if(idx > 0 && idx < uNextBuf.extents[0] - 1u)
{
uNextBuf[idx] = uCurrBuf[idx] * (1.0 - 2.0 * r) + uCurrBuf[idx - 1] * r + uCurrBuf[idx + 1] * r;
}
Expand Down Expand Up @@ -146,9 +146,6 @@ auto main() -> int
auto uNextBufAcc = BufAcc{alpaka::allocBuf<double, Idx>(devAcc, extent)};
auto uCurrBufAcc = BufAcc{alpaka::allocBuf<double, Idx>(devAcc, extent)};

double* pCurrAcc = alpaka::getPtrNative(uCurrBufAcc);
double* pNextAcc = alpaka::getPtrNative(uNextBufAcc);

// Apply initial conditions for the test problem
for(uint32_t i = 0; i < numNodesX; i++)
{
Expand All @@ -163,15 +160,24 @@ auto main() -> int
alpaka::memcpy(queue, uNextBufAcc, uCurrBufAcc, extent);
alpaka::wait(queue);

auto* uCurrBufAccPtr = &uCurrBufAcc;
auto* uNextBufAccPtr = &uNextBufAcc;
for(uint32_t step = 0; step < numTimeSteps; step++)
{
// Compute next values
alpaka::exec<Acc>(queue, workdiv, kernel, pCurrAcc, pNextAcc, numNodesX, dx, dt);
alpaka::exec<Acc>(
queue,
workdiv,
kernel,
alpaka::experimental::readAccess(*uCurrBufAccPtr),
alpaka::experimental::writeAccess(*uNextBufAccPtr),
dx,
dt);

// We assume the boundary conditions are constant and so these values
// do not need to be updated.
// So we just swap next to curr (shallow copy)
std::swap(pCurrAcc, pNextAcc);
std::swap(uCurrBufAccPtr, uNextBufAccPtr);
}

// Copy device -> host
Expand Down
17 changes: 8 additions & 9 deletions example/monteCarloIntegration/src/monteCarloIntegration.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2020 Benjamin Worpitz, Sergei Bastrakov, Jakob Krude
/* Copyright 2020-2021 Benjamin Worpitz, Sergei Bastrakov, Jakob Krude, Bernhard Manfred Gruber
*
* This file exemplifies usage of alpaka.
*
Expand Down Expand Up @@ -47,11 +47,12 @@ struct Kernel
//! \param numPoints The total number of points to be calculated.
//! \param globalCounter The sum of all local results.
//! \param functor The function for which the integral is to be computed.
template<typename TAcc, typename TFunctor>
template<typename TAcc, typename TMemoryHandle, typename TIdx, typename TFunctor>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
size_t const numPoints,
uint32_t* const globalCounter,
alpaka::experimental::Accessor<TMemoryHandle, uint32_t, TIdx, 1, alpaka::experimental::ReadWriteAccess>
globalCounter,
TFunctor functor) const -> void
{
// Get the global linearized thread idx.
Expand Down Expand Up @@ -81,7 +82,7 @@ struct Kernel
}

// Add the local result to the sum of the other results.
alpaka::atomicAdd(acc, globalCounter, localCount, alpaka::hierarchy::Blocks{});
alpaka::atomicAdd(acc, &globalCounter[0], localCount, alpaka::hierarchy::Blocks{});
}
};

Expand Down Expand Up @@ -117,21 +118,19 @@ auto main() -> int

// Setup buffer.
BufHost bufHost{alpaka::allocBuf<uint32_t, Idx>(devHost, extent)};
uint32_t* const ptrBufHost{alpaka::getPtrNative(bufHost)};
BufAcc bufAcc{alpaka::allocBuf<uint32_t, Idx>(devAcc, extent)};
uint32_t* const ptrBufAcc{alpaka::getPtrNative(bufAcc)};

// Initialize the global count to 0.
ptrBufHost[0] = 0.0f;
alpaka::experimental::access(bufHost)[0] = 0.0f;
alpaka::memcpy(queue, bufAcc, bufHost, extent);

Kernel kernel;
alpaka::exec<Acc>(queue, workdiv, kernel, numPoints, ptrBufAcc, Function{});
alpaka::exec<Acc>(queue, workdiv, kernel, numPoints, alpaka::experimental::access(bufAcc), Function{});
alpaka::memcpy(queue, bufHost, bufAcc, extent);
alpaka::wait(queue);

// Check the result.
uint32_t globalCount = *ptrBufHost;
uint32_t globalCount = alpaka::experimental::access(bufHost)[0];

// Final result.
float finalResult = globalCount / static_cast<float>(numPoints);
Expand Down

0 comments on commit 21626fa

Please sign in to comment.