Skip to content
Permalink
Browse files

fm: override auto-ncol_block for ELPA redist

fixes #578
  • Loading branch information
dev-zero committed Nov 29, 2019
1 parent 5267fe0 commit e0a41d874b244fc5d4961d6406f280471c636675
Showing with 17 additions and 6 deletions.
  1. +9 −2 src/fm/cp_fm_diag_utils.F
  2. +8 −4 src/fm/cp_fm_struct.F
@@ -297,7 +297,7 @@ SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvecto
#if defined(__SCALAPACK)
REAL(KIND=dp) :: fake_local_data(1, 1)
INTEGER :: fake_descriptor(9), mepos_old, &
io_unit, ngroups
io_unit, ngroups, ncol_block
TYPE(cp_fm_struct_type), POINTER :: fm_struct_new
TYPE(cp_para_env_type), POINTER :: para_env
TYPE(cp_logger_type), POINTER :: logger
@@ -327,6 +327,7 @@ SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvecto
! we take a multiple of 4, and approximately n/60
para_env => matrix%matrix_struct%para_env
mepos_old = para_env%mepos
ncol_block = -1 ! normally we also want to adjust the block size according to the optimal # of CPUs

rdinfo%matrix_order = matrix%matrix_struct%nrow_global
rdinfo%num_pe_old = para_env%num_pe
@@ -352,6 +353,11 @@ SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvecto
ELSE IF (rdinfo%num_pe_old > rdinfo%num_pe_max_nz_col) THEN
! Otherwise, only redistribute if we have to
rdinfo%num_pe_new = rdinfo%num_pe_max_nz_col
! do NOT let cp_fm_struct_create automatically adjust the block size because the
! calculated number of processors such that no block has 0 columns wouldn't match (see #578):
! if the automatically chosen block size is larger than the present one we would still end
! up with empty processors
CALL cp_fm_get_info(matrix, ncol_block=ncol_block)
END IF
END IF
@@ -392,7 +398,8 @@ SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvecto
CALL cp_fm_struct_create(fmstruct=fm_struct_new, &
para_env=work_redistribute%para_env_new, &
context=work_redistribute%blacs_env_new, &
nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order)
nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &
ncol_block=ncol_block)
CALL cp_fm_create(matrix_new, matrix_struct=fm_struct_new, name="yevd_new_mat")
CALL cp_fm_create(eigenvectors_new, matrix_struct=fm_struct_new, name="yevd_new_vec")
CALL cp_fm_struct_release(fm_struct_new)
@@ -106,8 +106,10 @@ MODULE cp_fm_struct
!> \param context the blacs context of this matrix
!> \param nrow_global the number of row of the full matrix
!> \param ncol_global the number of colums of the full matrix
!> \param nrow_block the number of rows of a block of the matrix ! useful defaults are in, do not specify
!> \param ncol_block the number of colums of a block of the matrix ! useful defaults are in, do not specify
!> \param nrow_block the number of rows of a block of the matrix,
!> omit or set to -1 to use the built-in defaults
!> \param ncol_block the number of colums of a block of the matrix,
!> omit or set to -1 to use the built-in defaults
!> \param descriptor the scalapack descriptor of the matrix (if not given
!> a new one is allocated
!> \param first_p_pos ...
@@ -196,15 +198,17 @@ SUBROUTINE cp_fm_struct_create(fmstruct, para_env, context, nrow_global, &

! try to avoid small left-over blocks (anyway naive)
IF (PRESENT(nrow_block)) THEN
fmstruct%nrow_block = nrow_block
IF (nrow_block > 0) & ! allows setting the number of blocks to -1 to explicitly set to auto
fmstruct%nrow_block = nrow_block
END IF
IF (.NOT. my_force_block) THEN
dumblock = CEILING(REAL(fmstruct%nrow_global, KIND=dp)/ &
REAL(fmstruct%context%num_pe(1), KIND=dp))
fmstruct%nrow_block = MAX(1, MIN(fmstruct%nrow_block, dumblock))
END IF
IF (PRESENT(ncol_block)) THEN
fmstruct%ncol_block = ncol_block
IF (ncol_block > 0) & ! allows setting the number of blocks to -1 to explicitly set to auto
fmstruct%ncol_block = ncol_block
END IF
IF (.NOT. my_force_block) THEN
dumblock = CEILING(REAL(fmstruct%ncol_global, KIND=dp)/ &

0 comments on commit e0a41d8

Please sign in to comment.
You can’t perform that action at this time.