Skip to content

Commit

Permalink
Disable omp simd directives for older Intel and GNU compiler versions
Browse files Browse the repository at this point in the history
  • Loading branch information
mkrack committed Mar 24, 2021
1 parent 0ed8c19 commit 22c62e2
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 23 deletions.
11 changes: 11 additions & 0 deletions src/grid/common/grid_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,22 @@
// GCC introduced the unroll pragma with version 8 using a custom syntax.
#if defined(__INTEL_COMPILER)
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(unroll(N)))
#if __INTEL_COMPILER >= 1900
#define GRID_PRAGMA_SIMD(OBJS, N) \
_Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
#else
#define GRID_PRAGMA_SIMD(OBJS, N)
#endif
#elif defined(__GNUC__) && __GNUC__ >= 8
#define GRID_PRAGMA_SIMD(OBJS, N) \
_Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(GCC unroll N))
#elif defined(__GNUC__) && __GNUC__ < 8
#define GRID_PRAGMA_SIMD(OBJS, N)
#define GRID_PRAGMA_UNROLL(N)
#else // Most other compilers support a common syntax.
#define GRID_PRAGMA_SIMD(OBJS, N) \
_Pragma(GRID_STRINGIFY(omp simd linear OBJS simdlen(N)))
#define GRID_PRAGMA_UNROLL(N) _Pragma(GRID_STRINGIFY(unroll(N)))
#endif

Expand Down
18 changes: 12 additions & 6 deletions src/grid/cpu/grid_collocate_dgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ void grid_fill_pol_dgemm(const bool transpose, const double dr,
double *__restrict__ poly = &idx2(pol, 1, 0);
double *__restrict__ src1 = &idx2(pol, 0, 0);
double *__restrict__ dst = &idx2(pol, 2, 0);
#pragma omp simd linear(dst, src1, poly) simdlen(8)
//#pragma omp simd linear(dst, src1, poly) simdlen(8)
GRID_PRAGMA_SIMD((dst, src1, poly), 8)
for (int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++)
dst[ig] = src1[ig] * poly[ig] * poly[ig];
}
Expand All @@ -217,7 +218,8 @@ void grid_fill_pol_dgemm(const bool transpose, const double dr,
const double *__restrict__ poly = &idx2(pol, 1, 0);
const double *__restrict__ src1 = &idx2(pol, icoef - 1, 0);
double *__restrict__ dst = &idx2(pol, icoef, 0);
#pragma omp simd linear(dst, src1, poly) simdlen(8)
//#pragma omp simd linear(dst, src1, poly) simdlen(8)
GRID_PRAGMA_SIMD((dst, src1, poly), 8)
for (int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
dst[ig] = src1[ig] * poly[ig];
}
Expand All @@ -227,7 +229,8 @@ void grid_fill_pol_dgemm(const bool transpose, const double dr,
if (lp > 0) {
double *__restrict__ dst = &idx2(pol, 1, 0);
const double *__restrict__ src = &idx2(pol, 0, 0);
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
dst[ig] *= src[ig];
}
Expand Down Expand Up @@ -304,7 +307,8 @@ void apply_sphere_cutoff_ortho(struct collocation_integration_ *const handler,
position1[1], position1[2]);

const int sizex = upper_corner[2] - lower_corner[2];
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x = 0; x < sizex; x++) {
dst[x] += src[x];
}
Expand Down Expand Up @@ -425,7 +429,8 @@ void apply_spherical_cutoff_generic(
&idx3(handler->cube, position1[0], position1[1], position1[2]);

const int sizex = upper_corner[2] - lower_corner[2];
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x = 0; x < sizex; x++) {
dst[x] += src[x];
}
Expand Down Expand Up @@ -461,7 +466,8 @@ void collocate_l0(double *scratch, const double alpha, const bool orthogonal_xy,
for (int y = 0; y < cube->size[1]; y++) {
const double *__restrict src = &idx2(exp_xy[0], y, 0);
double *__restrict dst = &scratch[y * cube->ld_];
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= src[x];
}
Expand Down
6 changes: 4 additions & 2 deletions src/grid/cpu/grid_integrate_dgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ void extract_cube_within_spherical_cutoff_ortho(

const int sizex = upper_corner[2] - lower_corner[2];

#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x = 0; x < sizex; x++) {
dst[x] = src[x];
}
Expand Down Expand Up @@ -224,7 +225,8 @@ void extract_cube_within_spherical_cutoff_generic(

const int sizex = upper_corner[2] - lower_corner[2];

#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x = 0; x < sizex; x++) {
dst[x] = src[x];
}
Expand Down
36 changes: 24 additions & 12 deletions src/grid/cpu/non_orthorombic_corrections.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ void calculate_non_orthorombic_corrections_tensor_blocked(
double *restrict dst = &idx2(exp_blocked, y2, 0);
const double scal = x1[y_1 + y2] * c_exp_const;
const double *restrict src = &x2[x_1];
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x3 = 0; x3 < block_size[d2]; x3++) {
dst[x3] = scal * src[x3];
}
Expand Down Expand Up @@ -267,7 +268,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
const double *restrict yx = &idx3(Exp[0], 2, y, 0);
double *restrict dst = &idx3(cube[0], z, y, 0);

#pragma omp simd linear(dst, yx) simdlen(8)
//#pragma omp simd linear(dst, yx) simdlen(8)
GRID_PRAGMA_SIMD((dst, yx), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= yx[x];
}
Expand All @@ -283,7 +285,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
const double zy = idx3(Exp[0], 1, z, y);
double *restrict dst = &idx3(cube[0], z, y, 0);

#pragma omp simd linear(dst) simdlen(8)
//#pragma omp simd linear(dst) simdlen(8)
GRID_PRAGMA_SIMD((dst), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= zy;
}
Expand All @@ -298,7 +301,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
double *restrict zx = &idx3(Exp[0], 0, z, 0);
for (int y = 0; y < cube->size[1]; y++) {
double *restrict dst = &idx3(cube[0], z, y, 0);
#pragma omp simd linear(dst, zx) simdlen(8)
//#pragma omp simd linear(dst, zx) simdlen(8)
GRID_PRAGMA_SIMD((dst, zx), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= zx[x];
}
Expand All @@ -315,7 +319,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
const double *restrict yx = &idx3(Exp[0], 2, y, 0);
double *restrict dst = &idx3(cube[0], z, y, 0);

#pragma omp simd linear(dst, yx) simdlen(8)
//#pragma omp simd linear(dst, yx) simdlen(8)
GRID_PRAGMA_SIMD((dst, yx), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= zy * yx[x];
}
Expand All @@ -331,7 +336,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
for (int y = 0; y < cube->size[1]; y++) {
const double *restrict yx = &idx3(Exp[0], 2, y, 0);
double *restrict dst = &idx3(cube[0], z, y, 0);
#pragma omp simd linear(dst, yx) simdlen(8)
//#pragma omp simd linear(dst, yx) simdlen(8)
GRID_PRAGMA_SIMD((dst, yx), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= zx[x] * yx[x];
}
Expand All @@ -348,7 +354,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
const double zy = idx3(Exp[0], 1, z, y);
double *restrict dst = &idx3(cube[0], z, y, 0);

#pragma omp simd linear(dst) simdlen(8)
//#pragma omp simd linear(dst) simdlen(8)
GRID_PRAGMA_SIMD((dst), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= zx[x] * zy;
}
Expand All @@ -366,7 +373,8 @@ void apply_non_orthorombic_corrections(const bool *restrict plane,
const double *restrict yx = &idx3(Exp[0], 2, y, 0);
double *restrict dst = &idx3(cube[0], z, y, 0);

#pragma omp simd linear(dst, zx, yx) simdlen(8)
//#pragma omp simd linear(dst, zx, yx) simdlen(8)
GRID_PRAGMA_SIMD((dst, zx), 8)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= zx[x] * zy * yx[x];
}
Expand All @@ -382,7 +390,8 @@ void apply_non_orthorombic_corrections_xy_blocked(
double *restrict dst = &idx3(m[0], gamma, y1, 0);
const double *restrict src = &idx2(Exp[0], y1, 0);

#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x1 = 0; x1 < m->size[2]; x1++) {
dst[x1] *= src[x1];
}
Expand All @@ -396,7 +405,8 @@ void apply_non_orthorombic_corrections_xz_blocked(
const double *restrict src = &idx2(Exp[0], z1, 0);
for (int y1 = 0; y1 < m->size[1]; y1++) {
double *restrict dst = &idx3(m[0], z1, y1, 0);
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x1 = 0; x1 < m->size[2]; x1++) {
dst[x1] *= src[x1];
}
Expand All @@ -410,7 +420,8 @@ void apply_non_orthorombic_corrections_yz_blocked(
for (int y1 = 0; y1 < m->size[1]; y1++) {
const double src = idx2(Exp[0], z1, y1);
double *restrict dst = &idx3(m[0], z1, y1, 0);
#pragma omp simd linear(dst) simdlen(8)
//#pragma omp simd linear(dst) simdlen(8)
GRID_PRAGMA_SIMD((dst), 8)
for (int x1 = 0; x1 < m->size[2]; x1++) {
dst[x1] *= src;
}
Expand All @@ -426,7 +437,8 @@ void apply_non_orthorombic_corrections_xz_yz_blocked(
for (int y1 = 0; y1 < m->size[1]; y1++) {
const double src = idx2(Exp_yz[0], z1, y1);
double *restrict dst = &idx3(m[0], z1, y1, 0);
#pragma omp simd linear(dst) simdlen(8)
//#pragma omp simd linear(dst) simdlen(8)
GRID_PRAGMA_SIMD((dst), 8)
for (int x1 = 0; x1 < m->size[2]; x1++) {
dst[x1] *= src * src_xz[x1];
}
Expand Down
9 changes: 6 additions & 3 deletions src/grid/cpu/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ void extract_sub_grid(const int *lower_corner, const int *upper_corner,
#ifdef __LIBXSMM
LIBXSMM_PRAGMA_SIMD
#else
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
#endif
for (int x = 0; x < sizex; x++) {
dst[x] = src[x];
Expand Down Expand Up @@ -238,7 +239,8 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
#ifdef __LIBXSMM
LIBXSMM_PRAGMA_SIMD
#else
#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
#endif
for (int x = 0; x < sizex; x++) {
dst[x] += src[x];
Expand All @@ -248,7 +250,8 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
src += subgrid->ld_;
}

#pragma omp simd linear(dst, src) simdlen(8)
//#pragma omp simd linear(dst, src) simdlen(8)
GRID_PRAGMA_SIMD((dst, src), 8)
for (int x = 0; x < sizex; x++) {
dst[x] += src[x];
}
Expand Down

0 comments on commit 22c62e2

Please sign in to comment.