Skip to content

Commit

Permalink
XAS_TDP| Imporved MPI parallelization in xc-kernel integration
Browse files Browse the repository at this point in the history
  • Loading branch information
abussy committed Mar 30, 2020
1 parent cb0bfe3 commit 398120c
Show file tree
Hide file tree
Showing 8 changed files with 524 additions and 452 deletions.
13 changes: 1 addition & 12 deletions src/input_cp2k_dft.F
Original file line number Diff line number Diff line change
Expand Up @@ -8271,21 +8271,10 @@ SUBROUTINE create_xas_tdp_section(section)
! The KERNEL subsection
CALL section_create(subsection, __LOCATION__, name="KERNEL", &
description="Defines how the kernel is built in terms of functionals.", &
n_keywords=2, &
n_keywords=1, &
n_subsections=1, &
repeats=.FALSE.)

CALL keyword_create(keyword, __LOCATION__, name="NPROCS_GRID", &
variants=s2a("BATCH_SIZE", "NPROCS_PER_GRID"), &
description="The MPI processes are split into batches of size NPROCS_GRID "// &
"for the parallel treatment of the XC kernel. The excited "// &
"atoms are distributed over the batches and their integration "// &
"grids are split over the procs of the corresponding batch.", &
usage="NPROCS_GRID {integer}", &
default_i_val=1)
CALL section_add_keyword(subsection, keyword)
CALL keyword_release(keyword)

CALL keyword_create(keyword, __LOCATION__, name="RI_REGION", &
variants=(/"RI_RADIUS"/), &
description="The region defined by a sphere of the given radius around "// &
Expand Down
844 changes: 428 additions & 416 deletions src/xas_tdp_atom.F

Large diffs are not rendered by default.

18 changes: 8 additions & 10 deletions src/xas_tdp_kernel.F
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ MODULE xas_tdp_kernel
dbcsr_type_symmetric
USE distribution_2d_types, ONLY: distribution_2d_type
USE kinds, ONLY: dp
USE message_passing, ONLY: mp_bcast,&
mp_irecv,&
mp_isend,&
mp_waitall
USE message_passing, ONLY: mp_bcast
USE qs_environment_types, ONLY: get_qs_env,&
qs_environment_type
USE qs_kind_types, ONLY: get_qs_kind,&
Expand All @@ -40,6 +37,7 @@ MODULE xas_tdp_kernel
o3c_vec_release, o3c_vec_type
USE util, ONLY: get_limit
USE xas_tdp_types, ONLY: donor_state_type,&
get_proc_batch_sizes,&
xas_tdp_control_type,&
xas_tdp_env_type

Expand Down Expand Up @@ -87,9 +85,10 @@ SUBROUTINE kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp
CHARACTER(len=*), PARAMETER :: routineN = 'kernel_coulomb_xc', &
routineP = moduleN//':'//routineN

INTEGER :: bo(2), handle, i, ibatch, iex, lb, &
natom, nbatch, ndo_mo, ndo_so, &
nex_atom, nsgfp, ri_atom, source, ub
INTEGER :: batch_size, bo(2), handle, i, ibatch, &
iex, lb, natom, nbatch, ndo_mo, &
ndo_so, nex_atom, nsgfp, ri_atom, &
source, ub
INTEGER, DIMENSION(:), POINTER :: blk_size
LOGICAL :: do_coulomb, do_sc, do_sf, do_sg, do_tp, &
do_xc, found
Expand Down Expand Up @@ -143,17 +142,16 @@ SUBROUTINE kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp
nsgfp = SIZE(PQ, 1)
CALL get_qs_env(qs_env, para_env=para_env)
found = .FALSE.
nbatch = para_env%num_pe/xas_tdp_control%batch_size
IF (nbatch*xas_tdp_control%batch_size .NE. para_env%num_pe) nbatch = nbatch + 1
nex_atom = SIZE(xas_tdp_env%ex_atom_indices)
CALL get_proc_batch_sizes(batch_size, nbatch, nex_atom, para_env%num_pe)

DO ibatch = 0, nbatch - 1

bo = get_limit(nex_atom, nbatch, ibatch)
DO iex = bo(1), bo(2)

IF (xas_tdp_env%ex_atom_indices(iex) == ri_atom) THEN
source = ibatch*xas_tdp_control%batch_size !fxc is on all procs of the batch,
source = ibatch*batch_size
found = .TRUE. !but simply take the first
EXIT
END IF
Expand Down
4 changes: 0 additions & 4 deletions src/xas_tdp_methods.F
Original file line number Diff line number Diff line change
Expand Up @@ -984,10 +984,6 @@ SUBROUTINE print_info(ou, xas_tdp_control, qs_env)
WRITE (UNIT=ou, FMT="(/,T3,A,F5.2,A)") &
"XAS_TDP| RI Region's Radius: ", xas_tdp_control%ri_radius*angstrom, " Ang"

!parallelization scheme
WRITE (UNIT=ou, FMT="(T3,A,I4,A)") &
"XAS_TDP| Excited atoms distributed on batches of ", xas_tdp_control%batch_size, " procs"

CALL libxc_version_info(tmpStr)
WRITE (UNIT=ou, FMT="(T3,A,A,/,T3,A,/)") &
"XAS_TDP| XC Kernel Functional(s) from LIBXC Vers. ", TRIM(tmpStr(1:5)), &
Expand Down
95 changes: 87 additions & 8 deletions src/xas_tdp_types.F
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
MODULE xas_tdp_types
USE cp_array_utils, ONLY: cp_1d_i_p_type,&
cp_1d_r_p_type,&
cp_2d_i_p_type,&
cp_2d_r_p_type,&
cp_3d_r_p_type
USE cp_files, ONLY: file_exists
USE cp_fm_types, ONLY: cp_fm_release,&
cp_fm_type
USE cp_para_env, ONLY: cp_para_env_release
USE cp_para_types, ONLY: cp_para_env_type
USE dbcsr_api, ONLY: dbcsr_distribution_release,&
dbcsr_distribution_type,&
dbcsr_p_type,&
Expand Down Expand Up @@ -88,14 +91,12 @@ MODULE xas_tdp_types
!> the number of rows is the number of times the keyword is repeated
!> \param grid_info the information about the atomic grids used for the xc kernel integrals
!> \param is_periodic self-explanatory
!> \param batch_size how many procs are allocated to the XC treatment of excited atoms
! **************************************************************************************************
TYPE xas_tdp_control_type
INTEGER :: define_excited
INTEGER :: dipole_form
INTEGER :: n_search
INTEGER :: n_excited
INTEGER :: batch_size
REAL(dp) :: e_range
REAL(dp) :: sx
REAL(dp) :: eps_range
Expand Down Expand Up @@ -265,7 +266,7 @@ MODULE xas_tdp_types

END TYPE donor_state_type

! Some helper types
! Some helper types for xas_tdp_atom
TYPE grid_atom_p_type
TYPE(grid_atom_type), POINTER :: grid_atom
END TYPE grid_atom_p_type
Expand All @@ -274,6 +275,17 @@ MODULE xas_tdp_types
TYPE(harmonics_atom_type), POINTER :: harmonics_atom
END TYPE harmonics_atom_p_type

TYPE batch_info_type
TYPE(cp_para_env_type), POINTER :: para_env
INTEGER :: batch_size
INTEGER :: nbatch
INTEGER :: ibatch
INTEGER :: ipe
INTEGER, DIMENSION(:), ALLOCATABLE :: nso_proc
INTEGER, DIMENSION(:, :), ALLOCATABLE :: so_bo
TYPE(cp_2d_i_p_type), POINTER, DIMENSION(:) :: so_proc_info
END TYPE batch_info_type

! **************************************************************************************************
!> \brief a environment type that contains all the info needed for XAS_TDP atomic grid calculations
!> \param ri_radius defines the neighbors in the RI projection of the density
Expand Down Expand Up @@ -312,13 +324,15 @@ MODULE xas_tdp_types
CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_types'

! *** Public data types ***
PUBLIC :: xas_tdp_env_type, donor_state_type, xas_tdp_control_type, xas_atom_env_type
PUBLIC :: xas_tdp_env_type, donor_state_type, xas_tdp_control_type, xas_atom_env_type, &
batch_info_type

! *** Public subroutines ***
PUBLIC :: set_donor_state, free_ds_memory, &
PUBLIC :: set_donor_state, free_ds_memory, release_batch_info, &
xas_tdp_env_create, xas_tdp_env_release, set_xas_tdp_env, &
xas_tdp_control_create, xas_tdp_control_release, read_xas_tdp_control, &
xas_atom_env_create, xas_atom_env_release, donor_state_create, free_exat_memory
xas_atom_env_create, xas_atom_env_release, donor_state_create, free_exat_memory, &
get_proc_batch_sizes

CONTAINS

Expand Down Expand Up @@ -545,8 +559,6 @@ SUBROUTINE read_xas_tdp_control(xas_tdp_control, xas_tdp_section)
CALL section_vals_val_get(xas_tdp_section, "KERNEL%EXACT_EXCHANGE%_SECTION_PARAMETERS_", &
l_val=xas_tdp_control%do_hfx)

CALL section_vals_val_get(xas_tdp_section, "KERNEL%NPROCS_GRID", i_val=xas_tdp_control%batch_size)

CALL section_vals_val_get(xas_tdp_section, "KERNEL%RI_REGION", r_val=xas_tdp_control%ri_radius)
xas_tdp_control%ri_radius = bohr*xas_tdp_control%ri_radius

Expand Down Expand Up @@ -1272,4 +1284,71 @@ SUBROUTINE free_exat_memory(xas_tdp_env, atom)
END SUBROUTINE free_exat_memory
! **************************************************************************************************
!> \brief Releases a batch_info type
!> \param batch_info ...
! **************************************************************************************************
SUBROUTINE release_batch_info(batch_info)
TYPE(batch_info_type) :: batch_info
INTEGER :: i
CALL cp_para_env_release(batch_info%para_env)
IF (ASSOCIATED(batch_info%so_proc_info)) THEN
DO i = 1, SIZE(batch_info%so_proc_info)
IF (ASSOCIATED(batch_info%so_proc_info(i)%array)) THEN
DEALLOCATE (batch_info%so_proc_info(i)%array)
END IF
END DO
DEALLOCATE (batch_info%so_proc_info)
END IF
END SUBROUTINE release_batch_info
! **************************************************************************************************
!> \brief Uses heuristics to determine a good batching of the processros for fxc integration
!> \param batch_size ...
!> \param nbatch ...
!> \param nex_atom ...
!> \param nprocs ...
!> \note It is here and not in xas_tdp_atom because of circular dependencies issues
! **************************************************************************************************
SUBROUTINE get_proc_batch_sizes(batch_size, nbatch, nex_atom, nprocs)
INTEGER, INTENT(OUT) :: batch_size, nbatch
INTEGER, INTENT(IN) :: nex_atom, nprocs
INTEGER :: rest, test_size
!We have essentially 2 cases nex_atom >= nprocs or nex_atom < nprocs
IF (nex_atom >= nprocs) THEN
!If nex_atom >= nprocs, we look from batch size (starting from 1, ending with 4) that yields
!the best indicative load balance, i.e. the best spread of excited atom per batch
rest = 100000
DO test_size = 1, MIN(nprocs, 4)
nbatch = nprocs/test_size
IF (MODULO(nex_atom, nbatch) < rest) THEN
rest = MODULO(nex_atom, nbatch)
batch_size = test_size
END IF
END DO
nbatch = nprocs/batch_size
ELSE
!If nex_atom < nprocs, simply devide processors in nex_atom batches
nbatch = nex_atom
batch_size = nprocs/nbatch
END IF
!Note: because of possible odd numbers of MPI ranks / excited atoms, a couple of procs can
! be excluded from the batching (max 4)
END SUBROUTINE get_proc_batch_sizes
END MODULE xas_tdp_types
1 change: 0 additions & 1 deletion tests/QS/regtest-xastdp/C2H2-PBE-ri_region.inp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@

&KERNEL
RI_REGION 3.0
NPROCS_GRID 2
&XC_FUNCTIONAL
&LIBXC
FUNCTIONAL GGA_X_PBE
Expand Down
1 change: 0 additions & 1 deletion tests/QS/regtest-xastdp/CH3-PBE-uks.inp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@

&KERNEL
RI_RADIUS 3.0
NPROCS_GRID 2
&XC_FUNCTIONAL
&LIBXC
FUNCTIONAL GGA_X_PBE
Expand Down
Empty file.

0 comments on commit 398120c

Please sign in to comment.