Skip to content

Commit

Permalink
Add HIP managed memory support, resolves #162
Browse files Browse the repository at this point in the history
  • Loading branch information
tom91136 committed Sep 25, 2023
1 parent bd6bb09 commit 369785c
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file.
### Added
- Ability to build Kokkos and RAJA versions against existing packages.
- Thrust managed memory.
- HIP managed memory.
- New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`.

### Changed
Expand Down
2 changes: 2 additions & 0 deletions src/ci-test-compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ build_hip() {
local name="hip_build"

run_build $name "${HIP_CXX:?}" hip "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}"
run_build $name "${HIP_CXX:?}" hip "-DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DMEM=MANAGED"
run_build $name "${HIP_CXX:?}" hip "-DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DMEM=PAGEFAULT"

run_build $name "${GCC_CXX:?}" thrust "-DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DSDK_DIR=$ROCM_PATH -DTHRUST_IMPL=ROCM"
}
Expand Down
43 changes: 39 additions & 4 deletions src/hip/HIPStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,22 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
// Print out device information
std::cout << "Using HIP device " << getDeviceName(device_index) << std::endl;
std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl;
#if defined(MANAGED)
std::cout << "Memory: MANAGED" << std::endl;
#elif defined(PAGEFAULT)
std::cout << "Memory: PAGEFAULT" << std::endl;
#else
std::cout << "Memory: DEFAULT" << std::endl;
#endif

array_size = ARRAY_SIZE;
// Round dot_num_blocks up to next multiple of (TBSIZE * dot_elements_per_lane)
dot_num_blocks = (array_size + (TBSIZE * dot_elements_per_lane - 1)) / (TBSIZE * dot_elements_per_lane);

size_t array_bytes = sizeof(T);
array_bytes *= ARRAY_SIZE;
size_t total_bytes = array_bytes * 3;

// Allocate the host array for partial sums for dot kernels using hipHostMalloc.
// This creates an array on the host which is visible to the device. However, it requires
// synchronization (e.g. hipDeviceSynchronize) for the result to be available on the host
Expand All @@ -63,13 +74,26 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
if (props.totalGlobalMem < std::size_t{3}*ARRAY_SIZE*sizeof(T))
throw std::runtime_error("Device does not have enough memory for all 3 buffers");

// Create device buffers
hipMalloc(&d_a, ARRAY_SIZE*sizeof(T));
// Create device buffers
#if defined(MANAGED)
hipMallocManaged(&d_a, array_bytes);
check_error();
hipMallocManaged(&d_b, array_bytes);
check_error();
hipMallocManaged(&d_c, array_bytes);
check_error();
#elif defined(PAGEFAULT)
d_a = (T*)malloc(array_bytes);
d_b = (T*)malloc(array_bytes);
d_c = (T*)malloc(array_bytes);
#else
hipMalloc(&d_a, array_bytes);
check_error();
hipMalloc(&d_b, ARRAY_SIZE*sizeof(T));
hipMalloc(&d_b, array_bytes);
check_error();
hipMalloc(&d_c, ARRAY_SIZE*sizeof(T));
hipMalloc(&d_c, array_bytes);
check_error();
#endif
}


Expand Down Expand Up @@ -109,13 +133,24 @@ void HIPStream<T>::init_arrays(T initA, T initB, T initC)
template <class T>
void HIPStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
{

// Copy device memory to host
#if defined(PAGEFAULT) || defined(MANAGED)
hipDeviceSynchronize();
for (int i = 0; i < array_size; i++)
{
a[i] = d_a[i];
b[i] = d_b[i];
c[i] = d_c[i];
}
#else
hipMemcpy(a.data(), d_a, a.size()*sizeof(T), hipMemcpyDeviceToHost);
check_error();
hipMemcpy(b.data(), d_b, b.size()*sizeof(T), hipMemcpyDeviceToHost);
check_error();
hipMemcpy(c.data(), d_c, c.size()*sizeof(T), hipMemcpyDeviceToHost);
check_error();
#endif
}

template <typename T>
Expand Down
7 changes: 7 additions & 0 deletions src/hip/model.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
register_flag_required(CMAKE_CXX_COMPILER
"Absolute path to the AMD HIP C++ compiler")

register_flag_optional(MEM "Device memory mode:
DEFAULT - allocate host and device memory pointers.
MANAGED - use HIP Managed Memory.
PAGEFAULT - shared memory, only host pointers allocated."
"DEFAULT")

macro(setup)
# nothing to do here as hipcc does everything correctly, what a surprise!
register_definitions(${MEM})
endmacro()

0 comments on commit 369785c

Please sign in to comment.