Skip to content

Commit

Permalink
Offload: Rename offload_set_device -> offload_activate_chosen_device
Browse files Browse the repository at this point in the history
  • Loading branch information
oschuett committed Mar 13, 2022
1 parent 6a44e63 commit 05cf8f2
Show file tree
Hide file tree
Showing 20 changed files with 95 additions and 97 deletions.
4 changes: 2 additions & 2 deletions src/cp_gemm_interface.F
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ MODULE cp_gemm_interface
USE input_constants, ONLY: do_cosma,&
do_scalapack
USE kinds, ONLY: dp
USE offload_api, ONLY: offload_set_device
USE offload_api, ONLY: offload_activate_chosen_device
#include "./base/base_uses.f90"

IMPLICIT NONE
Expand Down Expand Up @@ -88,7 +88,7 @@ SUBROUTINE cp_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
CASE (do_cosma)
#if defined(__COSMA)
CALL timeset(routineN//"_cosma", handle1)
CALL offload_set_device()
CALL offload_activate_chosen_device()
CALL cosma_pdgemm(transa=transa, transb=transb, m=m, n=n, k=k, alpha=alpha, &
matrix_a=matrix_a, matrix_b=matrix_b, beta=beta, matrix_c=matrix_c, &
a_first_col=a_first_col, &
Expand Down
4 changes: 2 additions & 2 deletions src/dbm/dbm_mempool.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static void *actual_malloc(const size_t size, const bool on_device) {
#if defined(__DBM_CUDA)
if (on_device) {
void *memory;
offload_set_device();
offload_activate_chosen_device();
CHECK(cudaMalloc(&memory, size));
assert(memory != NULL);
return memory;
Expand All @@ -64,7 +64,7 @@ static void actual_free(void *memory, const bool on_device) {

#if defined(__DBM_CUDA)
if (on_device) {
offload_set_device();
offload_activate_chosen_device();
CHECK(cudaFree(memory));
return;
}
Expand Down
2 changes: 1 addition & 1 deletion src/dbm/dbm_miniapp.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) {
}

if (offload_get_device_count() > 0) {
offload_set_device_id(my_rank % offload_get_device_count());
offload_set_chosen_device(my_rank % offload_get_device_count());
}

// Create 2D cart.
Expand Down
10 changes: 5 additions & 5 deletions src/dbm/dbm_multiply_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void dbm_multiply_cuda_start(const int max_batch_size, const int nshards,
dbm_shard_t *shards_c_host,
dbm_multiply_cuda_context_t *ctx) {
// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

ctx->nshards = nshards;
ctx->shards_c_host = shards_c_host;
Expand Down Expand Up @@ -108,7 +108,7 @@ void dbm_multiply_cuda_upload_packs(const dbm_pack_t *pack_a,
const dbm_pack_t *pack_b,
dbm_multiply_cuda_context_t *ctx) {
// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

// Wait for all c-streams to complete before overwriting old packs.
cudaEvent_t event;
Expand Down Expand Up @@ -172,7 +172,7 @@ void dbm_multiply_cuda_process_batch(const int ntasks, const dbm_task_t *batch,
}

// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

const dbm_shard_t *shard_c_host = &ctx->shards_c_host[kshard];
dbm_shard_cuda_t *shard_c_dev = &ctx->shards_c_dev[kshard];
Expand Down Expand Up @@ -224,7 +224,7 @@ void dbm_multiply_cuda_process_batch(const int ntasks, const dbm_task_t *batch,
******************************************************************************/
void dbm_multiply_cuda_download_results(dbm_multiply_cuda_context_t *ctx) {
// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < ctx->nshards; i++) {
Expand All @@ -247,7 +247,7 @@ void dbm_multiply_cuda_download_results(dbm_multiply_cuda_context_t *ctx) {
******************************************************************************/
void dbm_multiply_cuda_stop(dbm_multiply_cuda_context_t *ctx) {
// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

// Wait for completion, then free cuda ressources.
#pragma omp parallel for schedule(dynamic)
Expand Down
4 changes: 2 additions & 2 deletions src/dbt/dbt_unittest.F
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ PROGRAM dbt_unittest
mp_world_finalize,&
mp_world_init
USE offload_api, ONLY: offload_get_device_count,&
offload_set_device_id
offload_set_chosen_device
#include "../base/base_uses.f90"

IMPLICIT NONE
Expand Down Expand Up @@ -63,7 +63,7 @@ PROGRAM dbt_unittest

! Select active offload device when available.
IF (offload_get_device_count() > 0) THEN
CALL offload_set_device_id(MOD(mynode, offload_get_device_count()))
CALL offload_set_chosen_device(MOD(mynode, offload_get_device_count()))
END IF

! set standard output parameters
Expand Down
4 changes: 2 additions & 2 deletions src/dbt/tas/dbt_tas_unittest.F
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ PROGRAM dbt_tas_unittest
mp_world_finalize,&
mp_world_init
USE offload_api, ONLY: offload_get_device_count,&
offload_set_device_id
offload_set_chosen_device
#include "../../base/base_uses.f90"

IMPLICIT NONE
Expand All @@ -55,7 +55,7 @@ PROGRAM dbt_tas_unittest

! Select active offload device when available.
IF (offload_get_device_count() > 0) THEN
CALL offload_set_device_id(MOD(mynode, offload_get_device_count()))
CALL offload_set_chosen_device(MOD(mynode, offload_get_device_count()))
END IF

io_unit = -1
Expand Down
8 changes: 4 additions & 4 deletions src/f77_interface.F
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ MODULE f77_interface
nnp_env_release, &
nnp_type
USE offload_api, ONLY: offload_get_device_count, &
offload_get_device_id, &
offload_set_device_id
offload_get_chosen_device, &
offload_set_chosen_device
USE periodic_table, ONLY: init_periodic_table
#if defined(__PW_GPU)
USE pw_gpu, ONLY: pw_gpu_finalize, &
Expand Down Expand Up @@ -276,7 +276,7 @@ SUBROUTINE init_cp2k(init_mpi, ierr)
! Select active offload device when available.
IF (offload_get_device_count() > 0) THEN
CALL mp_environ(numtask=numtask, taskid=taskid, groupid=mpi_comm_default)
CALL offload_set_device_id(MOD(taskid, offload_get_device_count()))
CALL offload_set_chosen_device(MOD(taskid, offload_get_device_count()))
END IF

! Initialize the DBCSR configuration
Expand All @@ -285,7 +285,7 @@ SUBROUTINE init_cp2k(init_mpi, ierr)
IF (offload_get_device_count() > 0) THEN
CALL dbcsr_init_lib(default_para_env%group, timeset_hook, timestop_hook, &
cp_abort_hook, cp_warn_hook, io_unit=unit_nr, &
accdrv_active_device_id=offload_get_device_id())
accdrv_active_device_id=offload_get_chosen_device())
ELSE
CALL dbcsr_init_lib(default_para_env%group, timeset_hook, timestop_hook, &
cp_abort_hook, cp_warn_hook, io_unit=unit_nr)
Expand Down
6 changes: 3 additions & 3 deletions src/grid/gpu/grid_gpu_task_list.cu
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ void grid_gpu_create_task_list(
const double dh_inv[][3][3], grid_gpu_task_list **task_list_out) {

// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

if (*task_list_out != NULL) {
// This is actually an opportunity to reuse some buffers.
Expand Down Expand Up @@ -391,7 +391,7 @@ void grid_gpu_collocate_task_list(const grid_gpu_task_list *task_list,
offload_buffer *grids[]) {

// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

// Upload blocks buffer using the main stream
CHECK(cudaMemcpyAsync(pab_blocks->device_buffer, pab_blocks->host_buffer,
Expand Down Expand Up @@ -467,7 +467,7 @@ void grid_gpu_integrate_task_list(const grid_gpu_task_list *task_list,
double forces[][3], double virial[3][3]) {

// Select GPU device.
offload_set_device();
offload_activate_chosen_device();

// Prepare shared buffers using the main stream
double *forces_dev = NULL;
Expand Down
2 changes: 1 addition & 1 deletion src/grid/grid_miniapp.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) {
return 1;
}

offload_set_device_id(0);
offload_set_chosen_device(0);
grid_library_init();

const double max_diff =
Expand Down
2 changes: 1 addition & 1 deletion src/grid/grid_unittest.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) {
return 1;
}

offload_set_device_id(0);
offload_set_chosen_device(0);
grid_library_init();

int errors = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/grid/hip/grid_hip_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ extern "C" void grid_hip_create_task_list(
// Select GPU device.
rocm_backend::context_info *ctx = nullptr;
if (*ctx_out == nullptr) {
ctx = new rocm_backend::context_info(offload_get_device_id());
ctx = new rocm_backend::context_info(offload_get_chosen_device());
*ctx_out = ctx;
} else {
ctx = *ctx_out;
Expand Down
48 changes: 24 additions & 24 deletions src/offload/offload_api.F
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ MODULE offload_api
CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'offload_api'

PUBLIC :: offload_get_device_count
PUBLIC :: offload_set_device_id, offload_get_device_id, offload_set_device
PUBLIC :: offload_set_chosen_device, offload_get_chosen_device, offload_activate_chosen_device
PUBLIC :: offload_timeset, offload_timestop, offload_mem_info
PUBLIC :: offload_buffer_type, offload_create_buffer, offload_free_buffer
PUBLIC :: offload_malloc_pinned_mem, offload_free_pinned_mem
Expand Down Expand Up @@ -107,63 +107,63 @@ END FUNCTION offload_get_device_count_c
END FUNCTION offload_get_device_count

! **************************************************************************************************
!> \brief Selects the device to be used.
!> \brief Selects the chosen device to be used.
!> \param device_id ...
!> \author Ole Schuett
! **************************************************************************************************
SUBROUTINE offload_set_device_id(device_id)
SUBROUTINE offload_set_chosen_device(device_id)
INTEGER, INTENT(IN) :: device_id

INTERFACE
SUBROUTINE offload_set_device_id_c(device_id) &
BIND(C, name="offload_set_device_id")
SUBROUTINE offload_set_chosen_device_c(device_id) &
BIND(C, name="offload_set_chosen_device")
IMPORT :: C_INT
INTEGER(KIND=C_INT), VALUE :: device_id
END SUBROUTINE offload_set_device_id_c
END SUBROUTINE offload_set_chosen_device_c
END INTERFACE

CALL offload_set_device_id_c(device_id=device_id)
CALL offload_set_chosen_device_c(device_id=device_id)

END SUBROUTINE offload_set_device_id
END SUBROUTINE offload_set_chosen_device

! **************************************************************************************************
!> \brief Returns the device to be used.
!> \brief Returns the chosen device.
!> \return ...
!> \author Ole Schuett
! **************************************************************************************************
FUNCTION offload_get_device_id() RESULT(device_id)
FUNCTION offload_get_chosen_device() RESULT(device_id)
INTEGER :: device_id

INTERFACE
FUNCTION offload_get_device_id_c() &
BIND(C, name="offload_get_device_id")
FUNCTION offload_get_chosen_device_c() &
BIND(C, name="offload_get_chosen_device")
IMPORT :: C_INT
INTEGER(KIND=C_INT) :: offload_get_device_id_c
END FUNCTION offload_get_device_id_c
INTEGER(KIND=C_INT) :: offload_get_chosen_device_c
END FUNCTION offload_get_chosen_device_c
END INTERFACE

device_id = offload_get_device_id_c()
device_id = offload_get_chosen_device_c()

IF (device_id < 0) &
CPABORT("Offload device not selected.")
CPABORT("No offload device has been chosen.")

END FUNCTION offload_get_device_id
END FUNCTION offload_get_chosen_device

! **************************************************************************************************
!> \brief Activates the device selected via offload_set_device_id()
!> \brief Activates the device selected via offload_set_chosen_device()
!> \author Ole Schuett
! **************************************************************************************************
SUBROUTINE offload_set_device()
SUBROUTINE offload_activate_chosen_device()

INTERFACE
SUBROUTINE offload_set_device_c() &
BIND(C, name="offload_set_device")
END SUBROUTINE offload_set_device_c
SUBROUTINE offload_activate_chosen_device_c() &
BIND(C, name="offload_activate_chosen_device")
END SUBROUTINE offload_activate_chosen_device_c
END INTERFACE

CALL offload_set_device_c()
CALL offload_activate_chosen_device_c()

END SUBROUTINE offload_set_device
END SUBROUTINE offload_activate_chosen_device

! **************************************************************************************************
!> \brief Starts a timing range.
Expand Down
4 changes: 2 additions & 2 deletions src/offload/offload_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ void offload_create_buffer(const int length, offload_buffer **buffer) {
#if defined(__OFFLOAD_CUDA)
// With size 0 cudaMallocHost doesn't null the pointer and cudaFreeHost fails.
(*buffer)->host_buffer = NULL;
offload_set_device();
offload_activate_chosen_device();
OFFLOAD_CHECK(
cudaMallocHost((void **)&(*buffer)->host_buffer, requested_size));
OFFLOAD_CHECK(cudaMalloc((void **)&(*buffer)->device_buffer, requested_size));
#elif defined(__OFFLOAD_HIP)
// With size 0 cudaMallocHost doesn't null the pointer and cudaFreeHost fails.
(*buffer)->host_buffer = NULL;
offload_set_device();
offload_activate_chosen_device();
OFFLOAD_CHECK(hipHostMalloc((void **)&(*buffer)->host_buffer, requested_size,
hipHostMallocDefault));
OFFLOAD_CHECK(hipMalloc((void **)&(*buffer)->device_buffer, requested_size));
Expand Down
18 changes: 9 additions & 9 deletions src/offload/offload_library.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#endif
#endif

static int current_device_id = -1;
static int chosen_device_id = -1;

const uint32_t colormap[] = {0xFFFFFF00, // Yellow
0xFFFF00FF, // Fuchsia
Expand Down Expand Up @@ -54,26 +54,26 @@ int offload_get_device_count(void) {
}

/*******************************************************************************
* \brief Selects the device to be used.
* \brief Selects the chosen device to be used.
* \author Ole Schuett
******************************************************************************/
void offload_set_device_id(int device_id) { current_device_id = device_id; }
void offload_set_chosen_device(int device_id) { chosen_device_id = device_id; }

/*******************************************************************************
* \brief Returns the device to be used.
* \brief Returns the chosen device.
* \author Ole Schuett
******************************************************************************/
int offload_get_device_id(void) { return current_device_id; }
int offload_get_chosen_device(void) { return chosen_device_id; }

/*******************************************************************************
* \brief Activates the device selected via offload_set_device_id()
* \brief Activates the device selected via offload_set_chosen_device()
* \author Ole Schuett
******************************************************************************/
void offload_set_device(void) {
void offload_activate_chosen_device(void) {
#ifdef __OFFLOAD_CUDA
OFFLOAD_CHECK(cudaSetDevice(current_device_id));
OFFLOAD_CHECK(cudaSetDevice(chosen_device_id));
#elif defined(__OFFLOAD_HIP)
OFFLOAD_CHECK(hipSetDevice(current_device_id));
OFFLOAD_CHECK(hipSetDevice(chosen_device_id));
#endif
}

Expand Down

0 comments on commit 05cf8f2

Please sign in to comment.