Disable omp simd directives for older Intel and GNU compiler versions

cp2k · Mar 24, 2021 · 22c62e2 · 22c62e2
1 parent 0ed8c19
commit 22c62e2
Show file tree

Hide file tree

Showing 5 changed files with 57 additions and 23 deletions.
diff --git a/src/grid/common/grid_common.h b/src/grid/common/grid_common.h
@@ -12,11 +12,22 @@
 // GCC introduced the unroll pragma with version 8 using a custom syntax.
 #if defined(__INTEL_COMPILER)
 #define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(unroll(N)))
+#if __INTEL_COMPILER >= 1900
+#define GRID_PRAGMA_SIMD(OBJS, N)                                              \
+  _Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
+#else
+#define GRID_PRAGMA_SIMD(OBJS, N)
+#endif
 #elif defined(__GNUC__) && __GNUC__ >= 8
+#define GRID_PRAGMA_SIMD(OBJS, N)                                              \
+  _Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
 #define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(GCC unroll N))
 #elif defined(__GNUC__) && __GNUC__ < 8
+#define GRID_PRAGMA_SIMD(OBJS, N)
 #define GRID_PRAGMA_UNROLL(N)
 #else // Most other compilers support a common syntax.
+#define GRID_PRAGMA_SIMD(OBJS, N)                                              \
+  _Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
 #define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(unroll(N)))
 #endif
 

diff --git a/src/grid/cpu/grid_collocate_dgemm.c b/src/grid/cpu/grid_collocate_dgemm.c
@@ -208,7 +208,8 @@ void grid_fill_pol_dgemm(const bool transpose, const double dr,
       double *__restrict__ poly = &idx2(pol, 1, 0);
       double *__restrict__ src1 = &idx2(pol, 0, 0);
       double *__restrict__ dst = &idx2(pol, 2, 0);
-#pragma omp simd linear(dst, src1, poly) simdlen(8)
+      //#pragma omp simd linear(dst, src1, poly) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src1, poly), 8)
       for (int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++)
         dst[ig] = src1[ig] * poly[ig] * poly[ig];
     }
@@ -217,7 +218,8 @@ void grid_fill_pol_dgemm(const bool transpose, const double dr,
       const double *__restrict__ poly = &idx2(pol, 1, 0);
       const double *__restrict__ src1 = &idx2(pol, icoef - 1, 0);
       double *__restrict__ dst = &idx2(pol, icoef, 0);
-#pragma omp simd linear(dst, src1, poly) simdlen(8)
+      //#pragma omp simd linear(dst, src1, poly) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src1, poly), 8)
       for (int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
         dst[ig] = src1[ig] * poly[ig];
       }
@@ -227,7 +229,8 @@ void grid_fill_pol_dgemm(const bool transpose, const double dr,
     if (lp > 0) {
       double *__restrict__ dst = &idx2(pol, 1, 0);
       const double *__restrict__ src = &idx2(pol, 0, 0);
-#pragma omp simd linear(dst, src) simdlen(8)
+      //#pragma omp simd linear(dst, src) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src), 8)
       for (int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
         dst[ig] *= src[ig];
       }
@@ -304,7 +307,8 @@ void apply_sphere_cutoff_ortho(struct collocation_integration_ *const handler,
                                            position1[1], position1[2]);
 
               const int sizex = upper_corner[2] - lower_corner[2];
-#pragma omp simd linear(dst, src) simdlen(8)
+              //#pragma omp simd linear(dst, src) simdlen(8)
+              GRID_PRAGMA_SIMD((dst, src), 8)
               for (int x = 0; x < sizex; x++) {
                 dst[x] += src[x];
               }
@@ -425,7 +429,8 @@ void apply_spherical_cutoff_generic(
               &idx3(handler->cube, position1[0], position1[1], position1[2]);
 
           const int sizex = upper_corner[2] - lower_corner[2];
-#pragma omp simd linear(dst, src) simdlen(8)
+          //#pragma omp simd linear(dst, src) simdlen(8)
+          GRID_PRAGMA_SIMD((dst, src), 8)
           for (int x = 0; x < sizex; x++) {
             dst[x] += src[x];
           }
@@ -461,7 +466,8 @@ void collocate_l0(double *scratch, const double alpha, const bool orthogonal_xy,
     for (int y = 0; y < cube->size[1]; y++) {
       const double *__restrict src = &idx2(exp_xy[0], y, 0);
       double *__restrict dst = &scratch[y * cube->ld_];
-#pragma omp simd linear(dst, src) simdlen(8)
+      //#pragma omp simd linear(dst, src) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src), 8)
       for (int x = 0; x < cube->size[2]; x++) {
         dst[x] *= src[x];
       }

diff --git a/src/grid/cpu/grid_integrate_dgemm.c b/src/grid/cpu/grid_integrate_dgemm.c
@@ -102,7 +102,8 @@ void extract_cube_within_spherical_cutoff_ortho(
 
               const int sizex = upper_corner[2] - lower_corner[2];
 
-#pragma omp simd linear(dst, src) simdlen(8)
+              //#pragma omp simd linear(dst, src) simdlen(8)
+              GRID_PRAGMA_SIMD((dst, src), 8)
               for (int x = 0; x < sizex; x++) {
                 dst[x] = src[x];
               }
@@ -224,7 +225,8 @@ void extract_cube_within_spherical_cutoff_generic(
 
           const int sizex = upper_corner[2] - lower_corner[2];
 
-#pragma omp simd linear(dst, src) simdlen(8)
+          //#pragma omp simd linear(dst, src) simdlen(8)
+          GRID_PRAGMA_SIMD((dst, src), 8)
           for (int x = 0; x < sizex; x++) {
             dst[x] = src[x];
           }

diff --git a/src/grid/cpu/non_orthorombic_corrections.c b/src/grid/cpu/non_orthorombic_corrections.c
@@ -232,7 +232,8 @@ void calculate_non_orthorombic_corrections_tensor_blocked(
             double *restrict dst = &idx2(exp_blocked, y2, 0);
             const double scal = x1[y_1 + y2] * c_exp_const;
             const double *restrict src = &x2[x_1];
-#pragma omp simd linear(dst, src) simdlen(8)
+            //#pragma omp simd linear(dst, src) simdlen(8)
+            GRID_PRAGMA_SIMD((dst, src), 8)
             for (int x3 = 0; x3 < block_size[d2]; x3++) {
               dst[x3] = scal * src[x3];
             }
@@ -267,7 +268,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
         const double *restrict yx = &idx3(Exp[0], 2, y, 0);
         double *restrict dst = &idx3(cube[0], z, y, 0);
 
-#pragma omp simd linear(dst, yx) simdlen(8)
+        //#pragma omp simd linear(dst, yx) simdlen(8)
+        GRID_PRAGMA_SIMD((dst, yx), 8)
         for (int x = 0; x < cube->size[2]; x++) {
           dst[x] *= yx[x];
         }
@@ -283,7 +285,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
         const double zy = idx3(Exp[0], 1, z, y);
         double *restrict dst = &idx3(cube[0], z, y, 0);
 
-#pragma omp simd linear(dst) simdlen(8)
+        //#pragma omp simd linear(dst) simdlen(8)
+        GRID_PRAGMA_SIMD((dst), 8)
         for (int x = 0; x < cube->size[2]; x++) {
           dst[x] *= zy;
         }
@@ -298,7 +301,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
       double *restrict zx = &idx3(Exp[0], 0, z, 0);
       for (int y = 0; y < cube->size[1]; y++) {
         double *restrict dst = &idx3(cube[0], z, y, 0);
-#pragma omp simd linear(dst, zx) simdlen(8)
+        //#pragma omp simd linear(dst, zx) simdlen(8)
+        GRID_PRAGMA_SIMD((dst, zx), 8)
         for (int x = 0; x < cube->size[2]; x++) {
           dst[x] *= zx[x];
         }
@@ -315,7 +319,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
         const double *restrict yx = &idx3(Exp[0], 2, y, 0);
         double *restrict dst = &idx3(cube[0], z, y, 0);
 
-#pragma omp simd linear(dst, yx) simdlen(8)
+        //#pragma omp simd linear(dst, yx) simdlen(8)
+        GRID_PRAGMA_SIMD((dst, yx), 8)
         for (int x = 0; x < cube->size[2]; x++) {
           dst[x] *= zy * yx[x];
         }
@@ -331,7 +336,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
       for (int y = 0; y < cube->size[1]; y++) {
         const double *restrict yx = &idx3(Exp[0], 2, y, 0);
         double *restrict dst = &idx3(cube[0], z, y, 0);
-#pragma omp simd linear(dst, yx) simdlen(8)
+        //#pragma omp simd linear(dst, yx) simdlen(8)
+        GRID_PRAGMA_SIMD((dst, yx), 8)
         for (int x = 0; x < cube->size[2]; x++) {
           dst[x] *= zx[x] * yx[x];
         }
@@ -348,7 +354,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
         const double zy = idx3(Exp[0], 1, z, y);
         double *restrict dst = &idx3(cube[0], z, y, 0);
 
-#pragma omp simd linear(dst) simdlen(8)
+        //#pragma omp simd linear(dst) simdlen(8)
+        GRID_PRAGMA_SIMD((dst), 8)
         for (int x = 0; x < cube->size[2]; x++) {
           dst[x] *= zx[x] * zy;
         }
@@ -366,7 +373,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
       const double *restrict yx = &idx3(Exp[0], 2, y, 0);
       double *restrict dst = &idx3(cube[0], z, y, 0);
 
-#pragma omp simd linear(dst, zx, yx) simdlen(8)
+      //#pragma omp simd linear(dst, zx, yx) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, zx), 8)
       for (int x = 0; x < cube->size[2]; x++) {
         dst[x] *= zx[x] * zy * yx[x];
       }
@@ -382,7 +390,8 @@ void apply_non_orthorombic_corrections_xy_blocked(
       double *restrict dst = &idx3(m[0], gamma, y1, 0);
       const double *restrict src = &idx2(Exp[0], y1, 0);
 
-#pragma omp simd linear(dst, src) simdlen(8)
+      //#pragma omp simd linear(dst, src) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src), 8)
       for (int x1 = 0; x1 < m->size[2]; x1++) {
         dst[x1] *= src[x1];
       }
@@ -396,7 +405,8 @@ void apply_non_orthorombic_corrections_xz_blocked(
     const double *restrict src = &idx2(Exp[0], z1, 0);
     for (int y1 = 0; y1 < m->size[1]; y1++) {
       double *restrict dst = &idx3(m[0], z1, y1, 0);
-#pragma omp simd linear(dst, src) simdlen(8)
+      //#pragma omp simd linear(dst, src) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src), 8)
       for (int x1 = 0; x1 < m->size[2]; x1++) {
         dst[x1] *= src[x1];
       }
@@ -410,7 +420,8 @@ void apply_non_orthorombic_corrections_yz_blocked(
     for (int y1 = 0; y1 < m->size[1]; y1++) {
       const double src = idx2(Exp[0], z1, y1);
       double *restrict dst = &idx3(m[0], z1, y1, 0);
-#pragma omp simd linear(dst) simdlen(8)
+      //#pragma omp simd linear(dst) simdlen(8)
+      GRID_PRAGMA_SIMD((dst), 8)
       for (int x1 = 0; x1 < m->size[2]; x1++) {
         dst[x1] *= src;
       }
@@ -426,7 +437,8 @@ void apply_non_orthorombic_corrections_xz_yz_blocked(
     for (int y1 = 0; y1 < m->size[1]; y1++) {
       const double src = idx2(Exp_yz[0], z1, y1);
       double *restrict dst = &idx3(m[0], z1, y1, 0);
-#pragma omp simd linear(dst) simdlen(8)
+      //#pragma omp simd linear(dst) simdlen(8)
+      GRID_PRAGMA_SIMD((dst), 8)
       for (int x1 = 0; x1 < m->size[2]; x1++) {
         dst[x1] *= src * src_xz[x1];
       }

diff --git a/src/grid/cpu/utils.c b/src/grid/cpu/utils.c
@@ -203,7 +203,8 @@ void extract_sub_grid(const int *lower_corner, const int *upper_corner,
 #ifdef __LIBXSMM
       LIBXSMM_PRAGMA_SIMD
 #else
-#pragma omp simd linear(dst, src) simdlen(8)
+      //#pragma omp simd linear(dst, src) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src), 8)
 #endif
       for (int x = 0; x < sizex; x++) {
         dst[x] = src[x];
@@ -238,7 +239,8 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
 #ifdef __LIBXSMM
       LIBXSMM_PRAGMA_SIMD
 #else
-#pragma omp simd linear(dst, src) simdlen(8)
+      //#pragma omp simd linear(dst, src) simdlen(8)
+      GRID_PRAGMA_SIMD((dst, src), 8)
 #endif
       for (int x = 0; x < sizex; x++) {
         dst[x] += src[x];
@@ -248,7 +250,8 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
       src += subgrid->ld_;
     }
 
-#pragma omp simd linear(dst, src) simdlen(8)
+    //#pragma omp simd linear(dst, src) simdlen(8)
+    GRID_PRAGMA_SIMD((dst, src), 8)
     for (int x = 0; x < sizex; x++) {
       dst[x] += src[x];
     }