Skip to content

Commit

Permalink
call dgemv_, etc... fortran functions instead of cblas equivalent
Browse files Browse the repository at this point in the history
  • Loading branch information
mtaillefumier authored and mkrack committed Mar 13, 2021
1 parent b7033f1 commit 4dfd2de
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 118 deletions.
4 changes: 3 additions & 1 deletion src/grid/cpu/grid_collocate_dgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ void apply_spherical_cutoff_generic(
&idx3(handler->cube, position1[0], position1[1], position1[2]);

const int sizex = upper_corner[2] - lower_corner[2];
GRID_PRAGMA_UNROLL(4)
#pragma GCC ivdep
for (int x = 0; x < sizex; x++) {
dst[x] += src[x];
Expand Down Expand Up @@ -464,6 +465,7 @@ void collocate_l0(double *scratch, const double alpha, const bool orthogonal_xy,
const double *__restrict src = &idx2(exp_xy[0], y, 0);
double *__restrict dst = &scratch[y * cube->ld_];
#pragma GCC ivdep
GRID_PRAGMA_UNROLL(4)
for (int x = 0; x < cube->size[2]; x++) {
dst[x] *= src[x];
}
Expand Down Expand Up @@ -651,7 +653,7 @@ void apply_mapping_cubic(struct collocation_integration_ *handler,
int **map = handler->map;
map[1] = map[0] + 2 * cmax + 1;
map[2] = map[1] + 2 * cmax + 1;
// memset(map[0], 0xff, sizeof(int) * 3 * (2 * cmax + 1));

for (int i = 0; i < 3; i++) {
for (int ig = 0; ig < handler->cube.size[i]; ig++) {
map[i][ig] = modulo(cube_center[i] + ig + lower_boundaries_cube[i] -
Expand Down
142 changes: 26 additions & 116 deletions src/grid/cpu/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#include <libxsmm.h>
#endif

#ifdef __MKL
#include <mkl.h>
#endif

#include "../common/grid_common.h"
#include "tensor_local.h"
#include "utils.h"
Expand Down Expand Up @@ -219,25 +223,6 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
position1[1] = position[1];
position1[2] = position[2];
}
for (int d = 0; d < 3; d++) {
if ((lower_corner[d] < grid->window_shift[d]) || (lower_corner[d] < 0) ||
(lower_corner[d] >= upper_corner[d]) || (upper_corner[d] <= 0) ||
(upper_corner[d] - lower_corner[d] > subgrid->size[d]) ||
(grid == NULL) || (subgrid == NULL)) {
printf("Error : invalid parameters. Values of the given parameters along "
"the first wrong dimension\n");
printf(" : lorner corner [%d] = %d\n", d, lower_corner[d]);
printf(" : upper corner [%d] = %d\n", d, upper_corner[d]);
printf(" : diff [%d] = %d\n", d,
upper_corner[d] - lower_corner[d]);
printf(" : src grid size [%d] = %d\n", d, subgrid->size[d]);
printf(" : dst grid size [%d] = %d\n", d, grid->size[d]);
printf(" : window dst grid size [%d] = %d\n", d,
grid->window_size[d]);
printf(" : window dst shift [%d] = %d\n", d, grid->window_shift[d]);
abort();
}
}

const int sizex = upper_corner[2] - lower_corner[2];
const int sizey = upper_corner[1] - lower_corner[1];
Expand All @@ -253,6 +238,7 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
#ifdef __LIBXSMM
LIBXSMM_PRAGMA_SIMD
#else
GRID_PRAGMA_UNROLL(4)
#pragma GCC ivdep
#endif
for (int x = 0; x < sizex; x++) {
Expand All @@ -263,6 +249,7 @@ void add_sub_grid(const int *lower_corner, const int *upper_corner,
src += subgrid->ld_;
}

GRID_PRAGMA_UNROLL(4)
#pragma GCC ivdep
for (int x = 0; x < sizex; x++) {
dst[x] += src[x];
Expand Down Expand Up @@ -402,6 +389,14 @@ void verify_orthogonality(const double dh[3][3], bool orthogonal[3]) {
}

#ifndef __MKL
extern void dger_(const int *M, const int *N, const double *alpha,
const double *X, const int *incX, const double *Y,
const int *incY, double *A, const int *lda);
extern void dgemv_(const char *Trans, const int *M, const int *N,
const double *alpha, const double *A, const int *lda,
const double *X, const int *incX, const double *beta,
double *Y, const int *incY);

void cblas_daxpy(const int N, const double alpha, const double *X,
const int incX, double *Y, const int incY) {
if ((incX == 1) && (incY == 1)) {
Expand Down Expand Up @@ -476,39 +471,9 @@ void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N,
const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda) {
if (Layout == CblasRowMajor) {
for (int i = 0; i < M; i++) {
const double x = alpha * X[i + incX];
if (incY == 1) {
double *restrict dst = &A[i * lda];
const double *restrict y = Y;
for (int k = 0; k < N; k++) {
dst[k] += y[k] * x;
}
} else {
double *restrict dst = &A[i * lda];
const double *restrict y = Y;
for (int k = 0; k < N; k++) {
dst[k] += y[k + incY] * x;
}
}
}
dger_(&N, &M, &alpha, Y, &incY, X, &incX, A, &lda);
} else {
for (int j = 0; j < N; j++) {
const double y = alpha * Y[j + incY];
if (incX == 1) {
double *restrict dst = &A[j * lda];
const double *restrict x = X;
for (int k = 0; k < M; k++) {
dst[k] += x[k] * y;
}
} else {
double *restrict dst = &A[j * lda];
const double *restrict x = X;
for (int k = 0; k < M; k++) {
dst[k] += x[k + incX] * y;
}
}
}
dger_(&N, &M, &alpha, X, &incX, Y, &incY, A, &lda);
}
}

Expand All @@ -518,75 +483,20 @@ void cblas_dgemv(const CBLAS_LAYOUT order, const CBLAS_TRANSPOSE TransA,
const int M, const int N, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY) {
int i, j;
int lenX, lenY;
#define OFFSET(N, incX) ((incX) > 0 ? 0 : ((N)-1) * (-(incX)))

const int Trans = (TransA != CblasConjTrans) ? TransA : CblasTrans;

if (M == 0 || N == 0)
return;

if (alpha == 0.0 && beta == 1.0)
return;

if (Trans == CblasNoTrans) {
lenX = N;
lenY = M;
} else {
lenX = M;
lenY = N;
}

/* form y := beta*y */
if (beta == 0.0) {
int iy = OFFSET(lenY, incY);
for (i = 0; i < lenY; i++) {
Y[iy] = 0.0;
iy += incY;
}
} else if (beta != 1.0) {
int iy = OFFSET(lenY, incY);
for (i = 0; i < lenY; i++) {
Y[iy] *= beta;
iy += incY;
if (order == CblasColMajor) {
if (TransA == CblasTrans)
dgemv_("T", &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
else {
dgemv_("N", &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
}
}

if (alpha == 0.0)
return;

if ((order == CblasRowMajor && Trans == CblasNoTrans) ||
(order == CblasColMajor && Trans == CblasTrans)) {
/* form y := alpha*A*x + y */
int iy = OFFSET(lenY, incY);
for (i = 0; i < lenY; i++) {
double temp = 0.0;
int ix = OFFSET(lenX, incX);
for (j = 0; j < lenX; j++) {
temp += X[ix] * A[lda * i + j];
ix += incX;
}
Y[iy] += alpha * temp;
iy += incY;
}
} else if ((order == CblasRowMajor && Trans == CblasTrans) ||
(order == CblasColMajor && Trans == CblasNoTrans)) {
/* form y := alpha*A'*x + y */
int ix = OFFSET(lenX, incX);
for (j = 0; j < lenX; j++) {
const double temp = alpha * X[ix];
if (temp != 0.0) {
int iy = OFFSET(lenY, incY);
for (i = 0; i < lenY; i++) {
Y[iy] += temp * A[lda * j + i];
iy += incY;
}
}
ix += incX;
} else {
if (TransA == CblasTrans)
dgemv_("N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
else {
dgemv_("T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
}
}
#undef OFFSET
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/grid/cpu/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static inline void grid_free_scratch(void *ptr) {
#endif
}

/* even openblas has cblas versions of lapack and blas. */
/* even openblas and lapack has cblas versions of lapack and blas. */
#ifndef __MKL
enum CBLAS_LAYOUT { CblasRowMajor = 101, CblasColMajor = 102 };
enum CBLAS_TRANSPOSE {
Expand Down

0 comments on commit 4dfd2de

Please sign in to comment.