Skip to content

Commit

Permalink
Add View::extents()
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Feb 27, 2023
1 parent da10cd8 commit dfd7fac
Show file tree
Hide file tree
Showing 14 changed files with 43 additions and 37 deletions.
8 changes: 4 additions & 4 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ struct BlurKernel
const int bStart[2]
= {bi[0] * ElemsPerBlock + threadIdxInBlock[0], bi[1] * ElemsPerBlock + threadIdxInBlock[1]};
const int bEnd[2] = {
alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[0]),
alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.mapping().extents()[1]),
alpaka::math::min(acc, bStart[0] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[0]),
alpaka::math::min(acc, bStart[1] + ElemsPerBlock + 2 * KernelSize, oldImage.extents()[1]),
};
LLAMA_INDEPENDENT_DATA
for(auto y = bStart[0]; y < bEnd[0]; y += threadsPerBlock)
Expand All @@ -102,8 +102,8 @@ struct BlurKernel

const int start[2] = {ti[0] * Elems, ti[1] * Elems};
const int end[2] = {
alpaka::math::min(acc, start[0] + Elems, oldImage.mapping().extents()[0] - 2 * KernelSize),
alpaka::math::min(acc, start[1] + Elems, oldImage.mapping().extents()[1] - 2 * KernelSize),
alpaka::math::min(acc, start[0] + Elems, oldImage.extents()[0] - 2 * KernelSize),
alpaka::math::min(acc, start[1] + Elems, oldImage.extents()[1] - 2 * KernelSize),
};

LLAMA_INDEPENDENT_DATA
Expand Down
10 changes: 5 additions & 5 deletions examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ void output(int n, const ParticleView& particles)
};
auto addFloat = [&](float f) { buffer.push_back(swapBytes(f)); };

const auto pointCount = particles.mapping().extents()[0];
const auto pointCount = particles.extents()[0];
outP << "POINTS " << pointCount << " float\n";
buffer.reserve(pointCount * 3);
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
{
auto p = particles(i);
addFloat(0);
Expand All @@ -207,7 +207,7 @@ void output(int n, const ParticleView& particles)

outP << "POINT_DATA " << pointCount << "\nVECTORS velocity float\n";
buffer.clear();
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
{
auto p = particles(i);
addFloat(p(U{}, Z{}));
Expand All @@ -218,13 +218,13 @@ void output(int n, const ParticleView& particles)

outP << "SCALARS q float 1\nLOOKUP_TABLE default\n";
buffer.clear();
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
addFloat(particles(i)(Q{}));
flushBuffer();

outP << "SCALARS m float 1\nLOOKUP_TABLE default\n";
buffer.clear();
for(auto i : llama::ArrayIndexRange{particles.mapping().extents()})
for(auto i : llama::ArrayIndexRange{particles.extents()})
addFloat(particles(i)(M{}));
flushBuffer();
}
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ struct ComputeKernel
LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View a, View b) const
{
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
const auto [n] = a.mapping().extents();
const auto [n] = a.extents();
const auto start = ti * Elems;
const auto end = alpaka::math::min(acc, start + Elems, n);

Expand Down
2 changes: 1 addition & 1 deletion examples/memmap/memmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ auto computeCentroid(const View& triangles)
llama::One<Vertex> centroid{};
for(const auto& t : triangles)
centroid += t(tag::a) + t(tag::b) + t(tag::c);
return centroid / triangles.mapping().extents()[0] / 3;
return centroid / triangles.extents()[0] / 3;
}

auto main(int argc, const char* argv[]) -> int
Expand Down
2 changes: 1 addition & 1 deletion examples/root/lhcb_analysis/lhcb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ namespace
auto hists = std::vector<TH1D>(omp_get_max_threads(), TH1D("B_mass", mappingName.c_str(), 500, 5050, 5500));

auto begin = std::chrono::steady_clock::now();
const RE::NTupleSize_t n = view.mapping().extents()[0];
const RE::NTupleSize_t n = view.extents()[0];
#pragma omp parallel for
for(RE::NTupleSize_t i = 0; i < n; i++)
{
Expand Down
2 changes: 1 addition & 1 deletion examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace usellama
template<typename View>
[[gnu::noinline]] void compute(const View& a, const View& b, View& c)
{
const auto [n] = c.mapping().extents();
const auto [n] = c.extents();

for(std::size_t i = 0; i < n; i++)
{
Expand Down
6 changes: 3 additions & 3 deletions examples/viewcopy/viewcopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void stdCopy(const llama::View<SrcMapping, SrcBlobType>& srcView, llama::View<Ds
{
static_assert(std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>);

if(srcView.mapping().extents() != dstView.mapping().extents())
throw std::runtime_error{"Array dimensions sizes are different"};
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array extents are different"};

std::copy(srcView.begin(), srcView.end(), dstView.begin());
}
Expand Down Expand Up @@ -106,7 +106,7 @@ template<typename Mapping, typename BlobType>
auto hash(const llama::View<Mapping, BlobType>& view)
{
std::size_t acc = 0;
for(auto ad : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ad : llama::ArrayIndexRange{view.extents()})
llama::forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) { boost::hash_combine(acc, view(ad)(rc)); });
return acc;
}
Expand Down
11 changes: 6 additions & 5 deletions include/llama/BlobAllocators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ namespace llama::bloballoc
/// on the view before passing it to the kernel.
struct CudaMalloc
{
inline static const auto deleter = [](void* p)
{
if(const auto code = cudaFree(p); code != cudaSuccess)
throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
};

template<std::size_t FieldAlignment>
inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count) const
{
Expand All @@ -157,11 +163,6 @@ namespace llama::bloballoc
throw std::runtime_error(std::string{"cudaMalloc failed with code "} + cudaGetErrorString(code));
if(reinterpret_cast<std::uintptr_t>(p) & (FieldAlignment - 1 != 0u))
throw std::runtime_error{"cudaMalloc does not align sufficiently"};
auto deleter = [](void* p)
{
if(const auto code = cudaFree(p); code != cudaSuccess)
throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
};
return std::unique_ptr<std::byte[], decltype(deleter)>(p, deleter);
}
};
Expand Down
10 changes: 5 additions & 5 deletions include/llama/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ namespace llama
internal::assertTrivialCopyable<typename Mapping::RecordDim>();

// TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions
if(srcView.mapping().extents() != dstView.mapping().extents())
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array dimensions sizes are different"};

// TODO(bgruber): this is maybe not the best parallel copying strategy
Expand Down Expand Up @@ -85,7 +85,7 @@ namespace llama
std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,
"The source and destination record dimensions must be the same");

if(srcView.mapping().extents() != dstView.mapping().extents())
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array dimensions sizes are different"};

auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE
Expand All @@ -95,7 +95,7 @@ namespace llama
};

constexpr auto dims = SrcMapping::ArrayExtents::rank;
const auto extents = srcView.mapping().extents().toArray();
const auto extents = srcView.extents().toArray();
const auto workPerThread = (extents[0] + threadCount - 1) / threadCount;
const auto start = threadId * workPerThread;
const auto end = std::min((threadId + 1) * workPerThread, static_cast<std::size_t>(extents[0]));
Expand Down Expand Up @@ -162,7 +162,7 @@ namespace llama
static constexpr auto lanesSrc = internal::aosoaLanes<SrcMapping>;
static constexpr auto lanesDst = internal::aosoaLanes<DstMapping>;

if(srcView.mapping().extents() != dstView.mapping().extents())
if(srcView.extents() != dstView.extents())
throw std::runtime_error{"Array dimensions sizes are different"};

static constexpr auto srcIsAoSoA = lanesSrc != std::numeric_limits<std::size_t>::max();
Expand All @@ -176,7 +176,7 @@ namespace llama
!dstIsAoSoA || std::tuple_size_v<decltype(dstView.storageBlobs)> == 1,
"Implementation assumes AoSoA with single blob");

const auto flatSize = product(dstView.mapping().extents());
const auto flatSize = product(dstView.extents());

// TODO(bgruber): implement the following by adding additional copy loops for the remaining elements
if(!srcIsAoSoA && flatSize % lanesDst != 0)
Expand Down
4 changes: 2 additions & 2 deletions include/llama/Simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ namespace llama
}
else
{
auto b = ArrayIndexIterator{srcRef.view.mapping().extents(), srcRef.arrayIndex()};
auto b = ArrayIndexIterator{srcRef.view.extents(), srcRef.arrayIndex()};
ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy
for(auto i = 0; i < Traits::lanes; i++)
reinterpret_cast<FieldType*>(&elemSimd)[i]
Expand Down Expand Up @@ -248,7 +248,7 @@ namespace llama
// TODO(bgruber): how does this generalize conceptually to 2D and higher dimensions? in which
// direction should we collect SIMD values?
const ElementSimd elemSimd = srcSimd(rc);
auto b = ArrayIndexIterator{dstRef.view.mapping().extents(), dstRef.arrayIndex()};
auto b = ArrayIndexIterator{dstRef.view.extents(), dstRef.arrayIndex()};
for(auto i = 0; i < Traits::lanes; i++)
dstRef.view (*b++)(cat(typename T::BoundRecordCoord{}, rc))
= reinterpret_cast<const FieldType*>(&elemSimd)[i]; // scalar store
Expand Down
2 changes: 1 addition & 1 deletion include/llama/Vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ namespace llama

LLAMA_FN_HOST_ACC_INLINE auto capacity() const -> size_type
{
return m_view.mapping().extents()[0];
return m_view.extents()[0];
}

// NOLINTNEXTLINE(readability-identifier-naming)
Expand Down
15 changes: 10 additions & 5 deletions include/llama/View.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ namespace llama
using View = View<Mapping, BlobType, Accessor>;
using RecordDim = typename View::RecordDim;
forEachADCoord(
view.mapping().extents(),
view.extents(),
[&]([[maybe_unused]] typename View::ArrayIndex ai)
{
if constexpr(isRecordDim<RecordDim>)
Expand Down Expand Up @@ -458,6 +458,11 @@ namespace llama
return static_cast<const Mapping&>(*this);
}

LLAMA_FN_HOST_ACC_INLINE auto extents() const -> ArrayExtents
{
return mapping().extents();
}

LLAMA_FN_HOST_ACC_INLINE auto accessor() -> Accessor&
{
return static_cast<Accessor&>(*this);
Expand Down Expand Up @@ -569,25 +574,25 @@ namespace llama
LLAMA_FN_HOST_ACC_INLINE
auto begin() -> iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.begin(), this};
}

LLAMA_FN_HOST_ACC_INLINE
auto begin() const -> const_iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.begin(), this};
}

LLAMA_FN_HOST_ACC_INLINE
auto end() -> iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.end(), this};
}

LLAMA_FN_HOST_ACC_INLINE
auto end() const -> const_iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
return {ArrayIndexRange<ArrayExtents>{extents()}.end(), this};
}

Array<BlobType, Mapping::blobCount> storageBlobs;
Expand Down
4 changes: 2 additions & 2 deletions tests/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ void iotaFillView(View& view)
{
std::int64_t value = 0;
using RecordDim = typename View::RecordDim;
for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ai : llama::ArrayIndexRange{view.extents()})
{
if constexpr(llama::isRecordDim<RecordDim>)
{
Expand All @@ -169,7 +169,7 @@ void iotaCheckView(View& view)
{
std::int64_t value = 0;
using RecordDim = typename View::RecordDim;
for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ai : llama::ArrayIndexRange{view.extents()})
{
if constexpr(llama::isRecordDim<RecordDim>)
{
Expand Down
2 changes: 1 addition & 1 deletion tests/mapping.Null.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ TEST_CASE("mapping.Null")
auto view = llama::allocView(mapping);
iotaFillView(view);

for(auto ai : llama::ArrayIndexRange{view.mapping().extents()})
for(auto ai : llama::ArrayIndexRange{view.extents()})
llama::forEachLeafCoord<Particle>(
[&](auto rc)
{
Expand Down

0 comments on commit dfd7fac

Please sign in to comment.