Skip to content

Commit

Permalink
Fixes #604,#605:
Browse files Browse the repository at this point in the history
* `memory::region_t`'s are now CUDA-independent!
* `memory::const_region_t's` constructor from `region_t`'s is now by value rather than by const reference, so that rvalue-references are also supported
  • Loading branch information
eyalroz committed Mar 10, 2024
1 parent 0928a0c commit a48ee8b
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 29 deletions.
2 changes: 0 additions & 2 deletions src/cuda/api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#endif

#include "api/types.hpp"
#include "cuda/api/region.hpp"

#include "api/pci_id.hpp"
#include "api/constants.hpp"
#include "api/error.hpp"
Expand Down
32 changes: 14 additions & 18 deletions src/cuda/api/region.hpp → src/cuda/api/detail/region.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,32 @@
* @file
*
* @brief A memory region class (@ref `cuda::memory::region`) and related
* functinality.
* functionality.
*
* @note There is no CUDA-specific functionality here, and this class could be
* used irrespective of the CUDA APIs and GPUs in general.
*/

#pragma once
#ifndef CUDA_API_WRAPPERS_REGION_HPP_
#define CUDA_API_WRAPPERS_REGION_HPP_

#if (__cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L))
#error "The CUDA API headers can only be compiled with C++11 or a later version of the C++ language standard"
#include <type_traits>
#include <stdexcept>

#ifndef NOEXCEPT_IF_NDEBUG
#ifdef NDEBUG
#define NOEXCEPT_IF_NDEBUG noexcept(true)
#else
#define NOEXCEPT_IF_NDEBUG noexcept(false)
#endif
#endif // NOEXCEPT_IF_NDEBUG

#include "types.hpp"

namespace cuda {

namespace memory {

namespace device {

using address_t = CUdeviceptr;

} // namespace device

namespace detail_ {

// Note: T should be either void or void const, nothing else
Expand All @@ -48,8 +51,6 @@ class base_region_t {
base_region_t() noexcept = default;
base_region_t(pointer start, size_type size_in_bytes) noexcept
: start_(start), size_in_bytes_(size_in_bytes) {}
base_region_t(device::address_t start, size_type size_in_bytes) noexcept
: start_(as_pointer(start)), size_in_bytes_(size_in_bytes) {}

template <typename U>
base_region_t(span<U> span) noexcept : start_(span.data()), size_in_bytes_(span.size() * sizeof(U))
Expand Down Expand Up @@ -87,11 +88,6 @@ class base_region_t {
pointer data() const noexcept { return start(); }
pointer get() const noexcept { return start(); }

device::address_t device_address() const noexcept
{
return device::address(start_);
}

protected:
base_region_t subregion(size_type offset_in_bytes, size_type size_in_bytes) const
#ifdef NDEBUG
Expand Down Expand Up @@ -136,7 +132,7 @@ struct region_t : public detail_::base_region_t<void> {

struct const_region_t : public detail_::base_region_t<void const> {
using base_region_t<void const>::base_region_t;
const_region_t(const region_t& r) : base_region_t(r.start(), r.size()) {}
const_region_t(region_t r) : base_region_t(r.start(), r.size()) {}
const_region_t subregion(size_t offset_in_bytes, size_t size_in_bytes) const
{
auto parent_class_subregion = base_region_t<void const>::subregion(offset_in_bytes, size_in_bytes);
Expand Down
1 change: 0 additions & 1 deletion src/cuda/api/error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#define CUDA_API_WRAPPERS_ERROR_HPP_

#include "types.hpp"
#include "region.hpp"
#include <cuda_runtime_api.h>

#include <type_traits>
Expand Down
1 change: 0 additions & 1 deletion src/cuda/api/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#include "error.hpp"
#include "pointer.hpp"
#include "current_context.hpp"
#include "region.hpp"
#include "unique_span.hpp"

// The following is needed for cudaGetSymbolAddress, cudaGetSymbolSize
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/multi_wrapper_impls/virtual_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ inline void set_access_mode(
{
CUmemAccessDesc desc { { CU_MEM_LOCATION_TYPE_DEVICE, device.id() }, CUmemAccess_flags(access_mode) };
static constexpr const size_t count { 1 };
auto result = cuMemSetAccess(fully_mapped_region.device_address(), fully_mapped_region.size(), &desc, count);
auto result = cuMemSetAccess(device::address(fully_mapped_region), fully_mapped_region.size(), &desc, count);
throw_if_error_lazy(result, "Failed setting the access mode to the virtual memory mapping to the range of size "
+ ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
}
Expand Down
5 changes: 5 additions & 0 deletions src/cuda/api/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include "detail/optional.hpp"
#include "detail/span.hpp"
#include "detail/region.hpp"

#ifndef __CUDACC__
#include <builtin_types.h>
Expand Down Expand Up @@ -56,11 +57,13 @@
#endif
#endif

#ifndef NOEXCEPT_IF_NDEBUG
#ifdef NDEBUG
#define NOEXCEPT_IF_NDEBUG noexcept(true)
#else
#define NOEXCEPT_IF_NDEBUG noexcept(false)
#endif
#endif // NOEXCEPT_IF_NDEBUG

#ifdef _MSC_VER
/*
Expand Down Expand Up @@ -624,6 +627,8 @@ inline address_t address(const void* device_ptr) noexcept
return reinterpret_cast<address_t>(device_ptr);
}

inline address_t address(memory::const_region_t region) noexcept { return address(region.start()); }

} // namespace device

inline void* as_pointer(device::address_t address) noexcept
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/unique_span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define CUDA_API_WRAPPERS_UNIQUE_SPAN_HPP_

#include "cuda/api/detail/span.hpp"
#include "region.hpp"
#include "detail/region.hpp"

#include <type_traits>
#include <memory>
Expand Down
10 changes: 5 additions & 5 deletions src/cuda/api/virtual_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,11 @@ inline reserved_address_range_t reserve(region_t requested_region, alignment_t a
{
unsigned long flags { 0 };
CUdeviceptr ptr;
auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, requested_region.device_address(), flags);
auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, device::address(requested_region), flags);
throw_if_error_lazy(status, "Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
+ " with alignment value " + ::std::to_string(alignment));
bool is_owning { true };
return detail_::wrap(memory::region_t { ptr, requested_region.size() }, alignment, is_owning);
return detail_::wrap(memory::region_t {as_pointer(ptr), requested_region.size() }, alignment, is_owning);
}

inline reserved_address_range_t reserve(size_t requested_size, alignment_t alignment = alignment::default_)
Expand Down Expand Up @@ -337,7 +337,7 @@ inline access_permissions_t get_access_mode(region_t fully_mapped_region, cuda::
{
CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
unsigned long long flags;
auto result = cuMemGetAccess(&flags, &location, fully_mapped_region.device_address() );
auto result = cuMemGetAccess(&flags, &location, device::address(fully_mapped_region) );
throw_if_error_lazy(result, "Failed determining the access mode for "
+ cuda::device::detail_::identify(device_id)
+ " to the virtual memory mapping to the range of size "
Expand Down Expand Up @@ -451,7 +451,7 @@ class mapping_t {
~mapping_t() noexcept(false)
{
if (not owning_) { return; }
auto result = cuMemUnmap(address_range_.device_address(), address_range_.size());
auto result = cuMemUnmap(device::address(address_range_), address_range_.size());
throw_if_error_lazy(result, "Failed unmapping " + mapping::detail_::identify(address_range_));
}

Expand Down Expand Up @@ -499,7 +499,7 @@ inline mapping_t map(region_t region, physical_allocation_t physical_allocation)
size_t offset_into_allocation { 0 }; // not yet supported, but in the API
constexpr const unsigned long long flags { 0 };
auto handle = physical_allocation.handle();
auto status = cuMemMap(region.device_address(), region.size(), offset_into_allocation, handle, flags);
auto status = cuMemMap(device::address(region), region.size(), offset_into_allocation, handle, flags);
throw_if_error_lazy(status, "Failed making a virtual memory mapping of "
+ physical_allocation::detail_::identify(physical_allocation)
+ " to the range of size " + ::std::to_string(region.size()) + " bytes at " +
Expand Down

0 comments on commit a48ee8b

Please sign in to comment.