Skip to content

Commit

Permalink
grid: Fix unroll pragmas ignored by icc
Browse files Browse the repository at this point in the history
  • Loading branch information
oschuett committed Apr 22, 2021
1 parent 0cd1d52 commit 50d0dad
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 17 deletions.
34 changes: 19 additions & 15 deletions src/grid/common/grid_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,30 @@

#define GRID_STRINGIFY(SYMBOL) #SYMBOL

// GCC introduced the unroll pragma with version 8 using a custom syntax.
#if defined(__INTEL_COMPILER)
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(unroll(N)))
#if __INTEL_COMPILER >= 1900
#define GRID_PRAGMA_SIMD(OBJS, N) \
_Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
#else
// GCC added the simd pragma with version 6.
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && __GNUC__ < 6
#define GRID_PRAGMA_SIMD(OBJS, N)
#endif
#elif defined(__GNUC__) && __GNUC__ >= 8
#define GRID_PRAGMA_SIMD(OBJS, N) \
_Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(GCC unroll N))
#elif defined(__GNUC__) && __GNUC__ < 8
// Intel added the simd pragma with version 19.00.
#elif defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1900
#define GRID_PRAGMA_SIMD(OBJS, N)
#define GRID_PRAGMA_UNROLL(N)
#else // Most other compilers support a common syntax.
// All compilers support the same syntax defined by the OpenMP standard.
#else
#define GRID_PRAGMA_SIMD(OBJS, N) \
_Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
#endif

// GCC added the unroll pragma with version 8 and...
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && __GNUC__ < 8
#define GRID_PRAGMA_UNROLL(N)
#define GRID_PRAGMA_UNROLL_UP_TO(N)
// ...chose a custom syntax.
#elif defined(__GNUC__) && !defined(__INTEL_COMPILER) && __GNUC__ >= 8
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(GCC unroll N))
#define GRID_PRAGMA_UNROLL_UP_TO(N) _Pragma(GRID_STRINGIFY(GCC unroll N))
// Most other compilers support a common syntax.
#else
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(unroll(N)))
#define GRID_PRAGMA_UNROLL_UP_TO(N) _Pragma("unroll")
#endif

#if defined(__CUDACC__)
Expand Down
4 changes: 2 additions & 2 deletions src/grid/ref/grid_ref_collint.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ ortho_cx_to_grid_avx2(const int lp, const int cmax, const int i,
__m256d r_vec_3 = _mm256_mul_pd(p_vec, _mm256_set1_pd(cx[3]));

// Remaining iterations for lxp > 0 use fused multiply adds.
GRID_PRAGMA_UNROLL(GRID_MAX_LP_OPTIMIZED)
GRID_PRAGMA_UNROLL_UP_TO(GRID_MAX_LP_OPTIMIZED)
for (int lxp = 1; lxp <= lp; lxp++) {
const double *cx_base = &cx[lxp * 4];
p_vec = _mm256_loadu_pd(&pol[0][lxp][icmax]);
Expand All @@ -121,7 +121,7 @@ ortho_cx_to_grid_avx2(const int lp, const int cmax, const int i,
__m256d grid_vec_2 = _mm256_loadu_pd(grid_2);
__m256d grid_vec_3 = _mm256_loadu_pd(grid_3);

GRID_PRAGMA_UNROLL(GRID_MAX_LP_OPTIMIZED + 1)
GRID_PRAGMA_UNROLL_UP_TO(GRID_MAX_LP_OPTIMIZED + 1)
for (int lxp = 0; lxp <= lp; lxp++) {
__m256d p_vec = _mm256_loadu_pd(&pol[0][lxp][icmax]);

Expand Down

0 comments on commit 50d0dad

Please sign in to comment.