Skip to content
Permalink
Browse files

Prefix PVFMM_ to remaining preprocessor macros

  • Loading branch information...
dmalhotra committed Mar 31, 2019
1 parent 979d503 commit 6cd67bdc77a870e75f879fc1f3a266b5b97b38fd
@@ -16,16 +16,11 @@ along with this program; see the file COPYING. If not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */

#ifndef _BLAS_H_
#define _BLAS_H_
#ifndef _PVFMM_BLAS_H_
#define _PVFMM_BLAS_H_

extern "C"
{
/*! DAXPY compute y := alpha * x + y where alpha is a scalar and x and y are n-vectors.
* See http://www.netlib.org/blas/daxpy.f for more information.
*/
void saxpy_(int* N, float* ALPHA, float* X, int* INCX, float* Y, int* INCY);
void daxpy_(int* N, double* ALPHA, double* X, int* INCX, double* Y, int* INCY);
/*! DGEMM performs one of the matrix-matrix operations
*
* C := alpha*op( A )*op( B ) + beta*C,
@@ -38,38 +33,10 @@ extern "C"
* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
* See http://www.netlib.org/blas/dgemm.f for more information.
*/
void sgemm_(char* TRANSA, char* TRANSB, int* M, int* N, int* K, float* ALPHA, float* A,
int* LDA, float* B, int* LDB, float* BETA, float* C, int* LDC);
void dgemm_(char* TRANSA, char* TRANSB, int* M, int* N, int* K, double* ALPHA, double* A,
int* LDA, double* B, int* LDB, double* BETA, double* C, int* LDC);
/*! DGEMV performs one of the matrix-vector operations
*
* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
*
* where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
* See http://www.netlib.org/blas/dgemv.f for more information
*/
void sgemv_(char* TRANS, int* M, int* N, float* ALPHA, float* A, int* LDA, float* X, int* INCX,
float* BETA, float* Y, int* INCY);
void dgemv_(char* TRANS, int* M, int* N, double* ALPHA, double* A, int* LDA, double* X, int* INCX,
double* BETA, double* Y, int* INCY);
/*! DGER performs the rank 1 operation
*
* A := alpha*x*y' + A,
*
* where alpha is a scalar, x is an m element vector, y is an n element
* vector and A is an m by n matrix.
* See http://www.netlib.org/blas/dger.f for more information
*/
void sger_ (int* M, int * N, float* ALPHA, float* X, int* INCX, float* Y, int* INCY,
float* A, int* LDA);
void dger_ (int* M, int * N, double* ALPHA, double* X, int* INCX, double* Y, int* INCY,
double* A, int* LDA);
/*! DSCAL computes y := alpha * y where alpha is a scalar and y is an n-vector.
* See http://www.netlib.org/blas/dscal.f for more information
*/
void sscal_(int* N, float* ALPHA, float* X, int* INCX);
void dscal_(int* N, double* ALPHA, double* X, int* INCX);
void sgemm_(const char* TRANSA, const char* TRANSB, const int* M, const int* N, const int* K, const float* ALPHA, const float* A,
const int* LDA, const float* B, const int* LDB, const float* BETA, float* C, const int* LDC);
void dgemm_(const char* TRANSA, const char* TRANSB, const int* M, const int* N, const int* K, const double* ALPHA, const double* A,
const int* LDA, const double* B, const int* LDB, const double* BETA, double* C, const int* LDC);
}

#endif
@@ -109,15 +109,15 @@ namespace DeviceWrapper{

// MIC functions

#define ALLOC alloc_if(1) free_if(0)
#define FREE alloc_if(0) free_if(1)
#define REUSE alloc_if(0) free_if(0)
#define PVFMM_ALLOC alloc_if(1) free_if(0)
#define PVFMM_FREE alloc_if(0) free_if(1)
#define PVFMM_REUSE alloc_if(0) free_if(0)

inline uintptr_t alloc_device_mic(char* dev_handle, size_t len){
assert(dev_handle!=NULL);
uintptr_t dev_ptr=(uintptr_t)NULL;
#ifdef __INTEL_OFFLOAD
#pragma offload target(mic:0) nocopy( dev_handle: length(len) ALLOC) out(dev_ptr)
#pragma offload target(mic:0) nocopy( dev_handle: length(len) PVFMM_ALLOC) out(dev_ptr)
#else
PVFMM_UNUSED(len);
#endif
@@ -127,7 +127,7 @@ namespace DeviceWrapper{

inline void free_device_mic(char* dev_handle, uintptr_t dev_ptr){
#ifdef __INTEL_OFFLOAD
#pragma offload target(mic:0) in( dev_handle: length(0) FREE)
#pragma offload target(mic:0) in( dev_handle: length(0) PVFMM_FREE)
{
assert(dev_ptr==(uintptr_t)dev_handle);
}
@@ -142,14 +142,14 @@ namespace DeviceWrapper{
int wait_lock_idx=MIC_Lock::curr_lock();
int lock_idx=MIC_Lock::get_lock();
if(dev_handle==host_ptr){
#pragma offload target(mic:0) in( dev_handle : length(len) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
#pragma offload target(mic:0) in( dev_handle : length(len) PVFMM_REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
{
assert(dev_ptr==(uintptr_t)dev_handle);
MIC_Lock::wait_lock(wait_lock_idx);
MIC_Lock::release_lock(lock_idx);
}
}else{
#pragma offload target(mic:0) in(host_ptr [0:len] : into ( dev_handle[0:len]) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
#pragma offload target(mic:0) in(host_ptr [0:len] : into ( dev_handle[0:len]) PVFMM_REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
{
assert(dev_ptr==(uintptr_t)dev_handle);
MIC_Lock::wait_lock(wait_lock_idx);
@@ -171,14 +171,14 @@ namespace DeviceWrapper{
int wait_lock_idx=MIC_Lock::curr_lock();
int lock_idx=MIC_Lock::get_lock();
if(dev_handle==host_ptr){
#pragma offload target(mic:0) out( dev_handle : length(len) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
#pragma offload target(mic:0) out( dev_handle : length(len) PVFMM_REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
{
assert(dev_ptr==(uintptr_t)dev_handle);
MIC_Lock::wait_lock(wait_lock_idx);
MIC_Lock::release_lock(lock_idx);
}
}else{
#pragma offload target(mic:0) out( dev_handle[0:len] : into (host_ptr [0:len]) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
#pragma offload target(mic:0) out( dev_handle[0:len] : into (host_ptr [0:len]) PVFMM_REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
{
assert(dev_ptr==(uintptr_t)dev_handle);
MIC_Lock::wait_lock(wait_lock_idx);
@@ -298,34 +298,34 @@ namespace DeviceWrapper{
// Implementation of MIC_Lock

#ifdef __MIC__
#define have_mic 1
#define PVFMM_have_mic 1
#else
#define have_mic 0
#define PVFMM_have_mic 0
#endif

#define NUM_LOCKS 1000000
#define PVFMM_NUM_LOCKS 1000000
inline void MIC_Lock::init(){
#ifdef __INTEL_OFFLOAD
if(have_mic) abort();// Cannot be called from MIC.
if(PVFMM_have_mic) abort();// Cannot be called from MIC.

lock_idx=0;
lock_vec.Resize(NUM_LOCKS);
lock_vec.Resize(PVFMM_NUM_LOCKS);
lock_vec.SetZero();
lock_vec_=lock_vec.AllocDevice(false);
{for(size_t i=0;i<NUM_LOCKS;i++) lock_vec [i]=1;}
{for(size_t i=0;i<PVFMM_NUM_LOCKS;i++) lock_vec [i]=1;}
#pragma offload target(mic:0)
{for(size_t i=0;i<NUM_LOCKS;i++) lock_vec_[i]=1;}
{for(size_t i=0;i<PVFMM_NUM_LOCKS;i++) lock_vec_[i]=1;}
#endif
}

inline int MIC_Lock::get_lock(){
#ifdef __INTEL_OFFLOAD
if(have_mic) abort();// Cannot be called from MIC.
if(PVFMM_have_mic) abort();// Cannot be called from MIC.

int idx;
#pragma omp critical
{
if(lock_idx==NUM_LOCKS-1){
if(lock_idx==PVFMM_NUM_LOCKS-1){
int wait_lock_idx=-1;
wait_lock_idx=MIC_Lock::curr_lock();
MIC_Lock::wait_lock(wait_lock_idx);
@@ -335,18 +335,18 @@ namespace DeviceWrapper{
}
idx=lock_idx;
lock_idx++;
assert(lock_idx<NUM_LOCKS);
assert(lock_idx<PVFMM_NUM_LOCKS);
}
return idx;
#else
return -1;
#endif
}
#undef NUM_LOCKS
#undef PVFMM_NUM_LOCKS

inline int MIC_Lock::curr_lock(){
#ifdef __INTEL_OFFLOAD
if(have_mic) abort();// Cannot be called from MIC.
if(PVFMM_have_mic) abort();// Cannot be called from MIC.
return lock_idx-1;
#else
return -1;
@@ -88,7 +88,7 @@ namespace par{

};

#define HS_MPIDATATYPE(CTYPE, MPITYPE) \
#define PVFMM_HS_MPIDATATYPE(CTYPE, MPITYPE) \
template <> \
class Mpi_datatype<CTYPE> { \
public: \
@@ -103,23 +103,23 @@ namespace par{
} \
};

HS_MPIDATATYPE(short, MPI_SHORT)
HS_MPIDATATYPE(int, MPI_INT)
HS_MPIDATATYPE(long, MPI_LONG)
HS_MPIDATATYPE(unsigned short, MPI_UNSIGNED_SHORT)
HS_MPIDATATYPE(unsigned int, MPI_UNSIGNED)
HS_MPIDATATYPE(unsigned long, MPI_UNSIGNED_LONG)
HS_MPIDATATYPE(float, MPI_FLOAT)
HS_MPIDATATYPE(double, MPI_DOUBLE)
HS_MPIDATATYPE(long double, MPI_LONG_DOUBLE)
HS_MPIDATATYPE(long long, MPI_LONG_LONG_INT)
HS_MPIDATATYPE(char, MPI_CHAR)
HS_MPIDATATYPE(unsigned char, MPI_UNSIGNED_CHAR)
PVFMM_HS_MPIDATATYPE(short, MPI_SHORT)
PVFMM_HS_MPIDATATYPE(int, MPI_INT)
PVFMM_HS_MPIDATATYPE(long, MPI_LONG)
PVFMM_HS_MPIDATATYPE(unsigned short, MPI_UNSIGNED_SHORT)
PVFMM_HS_MPIDATATYPE(unsigned int, MPI_UNSIGNED)
PVFMM_HS_MPIDATATYPE(unsigned long, MPI_UNSIGNED_LONG)
PVFMM_HS_MPIDATATYPE(float, MPI_FLOAT)
PVFMM_HS_MPIDATATYPE(double, MPI_DOUBLE)
PVFMM_HS_MPIDATATYPE(long double, MPI_LONG_DOUBLE)
PVFMM_HS_MPIDATATYPE(long long, MPI_LONG_LONG_INT)
PVFMM_HS_MPIDATATYPE(char, MPI_CHAR)
PVFMM_HS_MPIDATATYPE(unsigned char, MPI_UNSIGNED_CHAR)

//PetscScalar is simply a typedef for double. Hence no need to explicitly
//define an mpi_datatype for it.

#undef HS_MPIDATATYPE
#undef PVFMM_HS_MPIDATATYPE

template <typename T>
class Mpi_datatype<std::complex<T> > {
@@ -1950,7 +1950,7 @@ void FMM_Pts<FMMNode>::EvalList(SetupData<Real_t>& setup_data, bool device){
size_t b=((tid+1)*vec_cnt)/omp_p;

for(size_t i=a;i<b;i++){
const PERM_INT_T* perm=(PERM_INT_T*)(precomp_data[0]+input_perm[(interac_indx+i)*4+0]);
const PVFMM_PERM_INT_T* perm=(PVFMM_PERM_INT_T*)(precomp_data[0]+input_perm[(interac_indx+i)*4+0]);
const Real_t* scal=( Real_t*)(precomp_data[0]+input_perm[(interac_indx+i)*4+1]);
const Real_t* v_in =( Real_t*)( input_data[0]+input_perm[(interac_indx+i)*4+3]);
Real_t* v_out=( Real_t*)( buff_in +input_perm[(interac_indx+i)*4+2]);
@@ -2034,7 +2034,7 @@ void FMM_Pts<FMMNode>::EvalList(SetupData<Real_t>& setup_data, bool device){
if(tid<omp_p-1) while(b<vec_cnt && out_ptr==output_perm[(interac_indx+b)*4+3]) b++;
}
for(size_t i=a;i<b;i++){ // Compute permutations.
const PERM_INT_T* perm=(PERM_INT_T*)(precomp_data[0]+output_perm[(interac_indx+i)*4+0]);
const PVFMM_PERM_INT_T* perm=(PVFMM_PERM_INT_T*)(precomp_data[0]+output_perm[(interac_indx+i)*4+0]);
const Real_t* scal=( Real_t*)(precomp_data[0]+output_perm[(interac_indx+i)*4+1]);
const Real_t* v_in =( Real_t*)( buff_out +output_perm[(interac_indx+i)*4+2]);
Real_t* v_out=( Real_t*)( output_data[0]+output_perm[(interac_indx+i)*4+3]);
@@ -1,5 +1,5 @@
#ifndef _CUDA_FUNC_HPP_
#define _CUDA_FUNC_HPP_
#ifndef _PVFMM_CUDA_FUNC_HPP_
#define _PVFMM_CUDA_FUNC_HPP_

#ifdef __cplusplus
extern "C" {
@@ -33,4 +33,4 @@ template<> inline void out_perm_gpu<double>(char* precomp_data, double* output_d
out_perm_gpu_d(precomp_data, output_data, buff_out, output_perm, vec_cnt, M_dim1, stream);
}

#endif //_CUDA_FUNC_HPP_
#endif //_PVFMM_CUDA_FUNC_HPP_
Oops, something went wrong.

0 comments on commit 6cd67bd

Please sign in to comment.
You can’t perform that action at this time.