Skip to content

Commit

Permalink
Fix HIP/ROCm compilation some more
Browse files Browse the repository at this point in the history
  • Loading branch information
oschuett committed Mar 12, 2022
1 parent e563fa0 commit 66eea6d
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 16 deletions.
3 changes: 0 additions & 3 deletions src/offload/offload_library.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@

#if defined(__OFFLOAD_PROFILING)
#if defined(__OFFLOAD_CUDA)
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvToolsExt.h>
#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
#include <hip/hip_runtime_api.h>
#include <roctracer/roctx.h>
#endif
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/offload/offload_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#if defined(__OFFLOAD_CUDA)
#include <cuda_runtime.h>
#elif defined(__OFFLOAD_HIP)
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime.h>
#endif

#ifdef __cplusplus
Expand Down
8 changes: 4 additions & 4 deletions src/pw/gpu/pw_gpu_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,29 @@ extern "C" {

/*******************************************************************************
* \brief Launcher for pw_real_to_complex kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_real_to_complex(const double *din, double *zout,
const int ngpts, offloadStream_t stream);

/*******************************************************************************
* \brief Launcher for pw_complex_to_real kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_complex_to_real(const double *zin, double *dout,
const int ngpts, offloadStream_t stream);

/*******************************************************************************
* \brief Launcher for pw_gather_z kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_gather_z(double *pwcc, const double *c, const double scale,
const int ngpts, const int *ghatmap,
offloadStream_t stream);

/*******************************************************************************
* \brief Launcher for pw_scatter_z kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_scatter_z(double *c, const double *pwcc, const double scale,
const int ngpts, const int nmaps,
Expand Down
10 changes: 5 additions & 5 deletions src/pw/gpu/pw_gpu_kernels_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ __global__ void pw_real_to_complex(const double *din, double *zout,

/*******************************************************************************
* \brief Launcher for pw_real_to_complex kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_real_to_complex(const double *din, double *zout,
const int ngpts, cudaStream_t stream) {
const int ngpts, offloadStream_t stream) {
const int threadsPerBlock = 1024;
const int numBlocks = (ngpts + threadsPerBlock - 1) / threadsPerBlock;
pw_real_to_complex<<<numBlocks, threadsPerBlock, 0, stream>>>(din, zout,
Expand All @@ -58,7 +58,7 @@ __global__ void pw_complex_to_real(const double *zin, double *dout,

/*******************************************************************************
* \brief Launcher for pw_complex_to_real kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_complex_to_real(const double *zin, double *dout,
const int ngpts, offloadStream_t stream) {
Expand All @@ -83,7 +83,7 @@ __global__ void pw_gather_z(double *pwcc, const double *c, const double scale,

/*******************************************************************************
* \brief Launcher for pw_gather_z kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_gather_z(double *pwcc, const double *c, const double scale,
const int ngpts, const int *ghatmap,
Expand Down Expand Up @@ -114,7 +114,7 @@ __global__ void pw_scatter_z(double *c, const double *pwcc, const double scale,

/*******************************************************************************
* \brief Launcher for pw_scatter_z kernel.
* \author Ole Schütt
* \author Ole Schuett
******************************************************************************/
void pw_gpu_launch_scatter_z(double *c, const double *pwcc, const double scale,
const int ngpts, const int nmaps,
Expand Down
14 changes: 11 additions & 3 deletions tools/toolchain/scripts/generate_arch_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,30 +161,38 @@ if [ "${ENABLE_HIP}" = __TRUE__ ] && [ "${GPUVER}" != no ]; then
add_lib_from_paths HIP_LDFLAGS "libhipfft.*" $LIB_PATHS

PLATFORM_FLAGS=''
HIP_INCLUDES="-I${ROCM_PATH}/hip/include -I${ROCM_PATH}/hipblas/include -I${ROCM_PATH}/include"
HIP_INCLUDES="-I${ROCM_PATH}/include"
case "${GPUVER}" in
Mi50)
check_lib -lamdhip64 "hip"
add_lib_from_paths HIP_LDFLAGS "libamdhip64.*" $LIB_PATHS
check_lib -lhipfft "hip"
add_lib_from_paths HIP_LDFLAGS "libhipfft.*" $LIB_PATHS
check_lib -lrocblas "hip"
add_lib_from_paths HIP_LDFLAGS "librocblas.*" $LIB_PATHS
check_lib -lroctx64 "hip"
add_lib_from_paths HIP_LDFLAGS "libroctx64.*" $LIB_PATHS
check_lib -lroctracer64 "hip"
add_lib_from_paths HIP_LDFLAGS "libroctracer64.*" $LIB_PATHS
HIP_FLAGS+="-fPIE -D__HIP_PLATFORM_AMD__ -g --offload-arch=gfx906 -O3 --std=c++11 \$(DFLAGS)"
LIBS+=" IF_HIP(-lhipblas -lamdhip64 IF_DEBUG(-lroctx64 -lroctracer64|)|)"
LIBS+=" IF_HIP(-lamdhip64 -lhipfft -lhipblas -lrocblas IF_DEBUG(-lroctx64 -lroctracer64|)|)"
PLATFORM_FLAGS='-D__HIP_PLATFORM_AMD__'
DFLAGS+=' IF_HIP(-D__GRID_HIP -D__HIP_PLATFORM_AMD__ -D__PW_HIP -D__PW_GPU IF_DEBUG(-D__OFFLOAD_PROFILING|)|) -D__DBCSR_ACC'
CXXFLAGS+=" -fopenmp -std=c++11"
;;
Mi100)
check_lib -lamdhip64 "hip"
add_lib_from_paths HIP_LDFLAGS "libamdhip64.*" $LIB_PATHS
check_lib -lhipfft "hip"
add_lib_from_paths HIP_LDFLAGS "libhipfft.*" $LIB_PATHS
check_lib -lrocblas "hip"
add_lib_from_paths HIP_LDFLAGS "librocblas.*" $LIB_PATHS
check_lib -lroctx64 "hip"
add_lib_from_paths HIP_LDFLAGS "libroctx64.*" $LIB_PATHS
check_lib -lroctracer64 "hip"
add_lib_from_paths HIP_LDFLAGS "libroctracer64.*" $LIB_PATHS
HIP_FLAGS+="-fPIE -D__HIP_PLATFORM_AMD__ -g --offload-arch=gfx908 -O3 --std=c++11 \$(DFLAGS)"
LIBS+=" IF_HIP( -lhipblas -lamdhip64 IF_DEBUG(-lroctx64 -lroctracer64|)|)"
LIBS+=" IF_HIP(-lamdhip64 -lhipfft -lhipblas -lrocblas IF_DEBUG(-lroctx64 -lroctracer64|)|)"
PLATFORM_FLAGS='-D__HIP_PLATFORM_AMD__ '
DFLAGS+=' IF_HIP(-D__GRID_HIP -D__HIP_PLATFORM_AMD__ -D__PW_HIP -D__PW_GPU IF_DEBUG(-D__OFFLOAD_PROFILING|)|) -D__DBCSR_ACC'
CXXFLAGS+=" -fopenmp -std=c++11"
Expand Down

0 comments on commit 66eea6d

Please sign in to comment.