Skip to content

Commit

Permalink
Add option to set number of MP2 integration groups externally
Browse files Browse the repository at this point in the history
Allows testing of low-memory regime.
  • Loading branch information
Frederick Stein authored and fstein93 committed Jan 16, 2023
1 parent b673757 commit 6769890
Show file tree
Hide file tree
Showing 10 changed files with 406 additions and 89 deletions.
12 changes: 12 additions & 0 deletions src/input_cp2k_mp2.F
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,18 @@ SUBROUTINE create_ri_mp2(section)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

CALL keyword_create(keyword, __LOCATION__, name="NUMBER_INTEGRATION_GROUPS", &
description="Sets the number of integration groups of the communication scheme in RI-MP2."// &
"Integrals will be replicated such that each integration group has all integrals available. "// &
"Must be a divisor of the number of subgroups (see GROUP_SIZE keyword in the WF_CORRELATION "// &
"section. Smaller groups reduce the communication costs but increase the memory developments. "// &
"If the provided value is non-positive or not a divisor of the number of subgroups, "// &
"the number of integration groups is determined automatically (default).", &
usage="NUMBER_INTEGRATION_GROUPS 2", &
default_i_val=-1)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

CALL keyword_create( &
keyword, __LOCATION__, &
name="PRINT_DGEMM_INFO", &
Expand Down
186 changes: 98 additions & 88 deletions src/mp2_ri_gpw.F
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,7 @@ SUBROUTINE mp2_ri_get_integ_group_size(mp2_env, para_env, para_env_sub, gd_array
max_repl_group_size, &
min_integ_group_size
INTEGER(KIND=int_8) :: mem
LOGICAL :: calc_group_size
REAL(KIND=dp) :: factor, mem_base, mem_min, mem_per_blk, &
mem_per_repl, mem_per_repl_blk, &
mem_real
Expand All @@ -1341,97 +1342,106 @@ SUBROUTINE mp2_ri_get_integ_group_size(mp2_env, para_env, para_env_sub, gd_array

ngroup = para_env%num_pe/para_env_sub%num_pe

CALL m_memory(mem)
mem_real = (mem + 1024*1024 - 1)/(1024*1024)
CALL mp_min(mem_real, para_env%group)

mem_base = 0.0_dp
mem_per_blk = 0.0_dp
mem_per_repl = 0.0_dp
mem_per_repl_blk = 0.0_dp

! BIB_C_copy
mem_per_repl = mem_per_repl + MAXVAL(MAX(REAL(homo, KIND=dp)*maxsize(gd_array), REAL(dimen_RI, KIND=dp))* &
maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! BIB_C
mem_per_repl = mem_per_repl + SUM(REAL(homo, KIND=dp)*maxsize(gd_B_virtual))*maxsize(gd_array)*8.0_dp/(1024**2)
! BIB_C_rec
mem_per_repl_blk = mem_per_repl_blk + REAL(MAXVAL(maxsize(gd_B_virtual)), KIND=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
! local_i_aL+local_j_aL
mem_per_blk = mem_per_blk + 2.0_dp*MAXVAL(maxsize(gd_B_virtual))*REAL(dimen_RI, KIND=dp)*8.0_dp/(1024**2)
! local_ab
mem_base = mem_base + MAXVAL(REAL(virtual, KIND=dp)*maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! external_ab/external_i_aL
mem_base = mem_base + REAL(MAX(dimen_RI, MAXVAL(virtual)), KIND=dp)*MAXVAL(maxsize(gd_B_virtual))*8.0_dp/(1024**2)
calc_group_size = mp2_env%ri_mp2%number_integration_groups <= 0
IF (.NOT. calc_group_size) THEN
IF (MOD(ngroup, mp2_env%ri_mp2%number_integration_groups) /= 0) calc_group_size = .TRUE.
END IF

IF (calc_forces) THEN
! Gamma_P_ia
mem_per_repl = mem_per_repl + SUM(REAL(homo, KIND=dp)*maxsize(gd_array)* &
maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! Y_i_aP+Y_j_aP
mem_per_blk = mem_per_blk + 2.0_dp*MAXVAL(maxsize(gd_B_virtual))*dimen_RI*8.0_dp/(1024**2)
! local_ba/t_ab
mem_base = mem_base + REAL(MAXVAL(maxsize(gd_B_virtual)), KIND=dp)*MAX(dimen_RI, MAXVAL(virtual))*8.0_dp/(1024**2)
! P_ij
mem_base = mem_base + SUM(REAL(homo, KIND=dp)*homo)*8.0_dp/(1024**2)
! P_ab
mem_base = mem_base + SUM(REAL(virtual, KIND=dp)*maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! send_ab/send_i_aL
IF (calc_group_size) THEN
CALL m_memory(mem)
mem_real = (mem + 1024*1024 - 1)/(1024*1024)
CALL mp_min(mem_real, para_env%group)

mem_base = 0.0_dp
mem_per_blk = 0.0_dp
mem_per_repl = 0.0_dp
mem_per_repl_blk = 0.0_dp

! BIB_C_copy
mem_per_repl = mem_per_repl + MAXVAL(MAX(REAL(homo, KIND=dp)*maxsize(gd_array), REAL(dimen_RI, KIND=dp))* &
maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! BIB_C
mem_per_repl = mem_per_repl + SUM(REAL(homo, KIND=dp)*maxsize(gd_B_virtual))*maxsize(gd_array)*8.0_dp/(1024**2)
! BIB_C_rec
mem_per_repl_blk = mem_per_repl_blk + REAL(MAXVAL(maxsize(gd_B_virtual)), KIND=dp)*maxsize(gd_array)*8.0_dp/(1024**2)
! local_i_aL+local_j_aL
mem_per_blk = mem_per_blk + 2.0_dp*MAXVAL(maxsize(gd_B_virtual))*REAL(dimen_RI, KIND=dp)*8.0_dp/(1024**2)
! local_ab
mem_base = mem_base + MAXVAL(REAL(virtual, KIND=dp)*maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! external_ab/external_i_aL
mem_base = mem_base + REAL(MAX(dimen_RI, MAXVAL(virtual)), KIND=dp)*MAXVAL(maxsize(gd_B_virtual))*8.0_dp/(1024**2)
END IF

! This a first guess based on the assumption of optimal block sizes
block_size = MAX(1, MIN(FLOOR(SQRT(REAL(MINVAL(homo), KIND=dp))), FLOOR(MINVAL(homo)/SQRT(2.0_dp*ngroup))))
IF (mp2_env%ri_mp2%block_size > 0) block_size = mp2_env%ri_mp2%block_size

mem_min = mem_base + mem_per_repl + (mem_per_blk + mem_per_repl_blk)*block_size

IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum available memory per MPI process:', &
mem_real, ' MiB'
IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum required memory per MPI process:', &
mem_min, ' MiB'

! We use the following communication model
! Comm(replication)+Comm(collection of data for ij pair)+Comm(contraction)
! One can show that the costs of the contraction step are independent of the block size and the replication group size
! With gradients, the other two steps are carried out twice (Y_i_aP -> Gamma_i_aP, and dereplication)
! NL ... number of RI basis functions
! NR ... replication group size
! NG ... number of sub groups
! NB ... Block size
! o ... number of occupied orbitals
! Then, we have the communication costs (in multiples of the original BIb_C matrix)
! (NR/NG)+(1-(NR/NG))*(o/NB+NB-2)/NG = (NR/NG)*(1-(o/NB+NB-2)/NG)+(o/NB+NB-2)/NG
! and with gradients
! 2*(NR/NG)+2*(1-(NR/NG))*(o/NB+NB-2)/NG = (NR/NG)*(1-(o/NB+NB-2)/NG)+(o/NB+NB-2)/NG
! We are looking for the minimum of the communication volume,
! thus, if the prefactor of (NR/NG) is smaller than zero, use the largest possible replication group size.
! If the factor is larger than zero, set the replication group size to 1. (For small systems and a large number of subgroups)
! Replication group size = 1 implies that the integration group size equals the number of subgroups

integ_group_size = ngroup

! Multiply everything by homo*virtual to consider differences between spin channels in case of open-shell calculations
factor = REAL(SUM(homo*virtual), KIND=dp) &
- SUM((REAL(MAXVAL(homo), KIND=dp)/block_size + block_size - 2.0_dp)*homo*virtual)/ngroup
IF (SIZE(homo) == 2) factor = factor - 2.0_dp*PRODUCT(homo)/block_size/ngroup*SUM(homo*virtual)

IF (factor <= 0.0_dp) THEN
! Remove the fixed memory and divide by the memory per replication group size
max_repl_group_size = MIN(MAX(FLOOR((mem_real - mem_base - mem_per_blk*block_size)/ &
(mem_per_repl + mem_per_repl_blk*block_size)), 1), ngroup)
! Convert to an integration group size
min_integ_group_size = ngroup/max_repl_group_size

! Ensure that the integration group size is a divisor of the number of sub groups
DO iiB = MAX(MIN(min_integ_group_size, ngroup), 1), ngroup
! check that the ngroup is a multiple of integ_group_size
IF (MOD(ngroup, iiB) == 0) THEN
integ_group_size = iiB
EXIT
END IF
integ_group_size = integ_group_size + 1
END DO
IF (calc_forces) THEN
! Gamma_P_ia
mem_per_repl = mem_per_repl + SUM(REAL(homo, KIND=dp)*maxsize(gd_array)* &
maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! Y_i_aP+Y_j_aP
mem_per_blk = mem_per_blk + 2.0_dp*MAXVAL(maxsize(gd_B_virtual))*dimen_RI*8.0_dp/(1024**2)
! local_ba/t_ab
mem_base = mem_base + REAL(MAXVAL(maxsize(gd_B_virtual)), KIND=dp)*MAX(dimen_RI, MAXVAL(virtual))*8.0_dp/(1024**2)
! P_ij
mem_base = mem_base + SUM(REAL(homo, KIND=dp)*homo)*8.0_dp/(1024**2)
! P_ab
mem_base = mem_base + SUM(REAL(virtual, KIND=dp)*maxsize(gd_B_virtual))*8.0_dp/(1024**2)
! send_ab/send_i_aL
mem_base = mem_base + REAL(MAX(dimen_RI, MAXVAL(virtual)), KIND=dp)*MAXVAL(maxsize(gd_B_virtual))*8.0_dp/(1024**2)
END IF

! This a first guess based on the assumption of optimal block sizes
block_size = MAX(1, MIN(FLOOR(SQRT(REAL(MINVAL(homo), KIND=dp))), FLOOR(MINVAL(homo)/SQRT(2.0_dp*ngroup))))
IF (mp2_env%ri_mp2%block_size > 0) block_size = mp2_env%ri_mp2%block_size

mem_min = mem_base + mem_per_repl + (mem_per_blk + mem_per_repl_blk)*block_size

IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum available memory per MPI process:', &
mem_real, ' MiB'
IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum required memory per MPI process:', &
mem_min, ' MiB'

! We use the following communication model
! Comm(replication)+Comm(collection of data for ij pair)+Comm(contraction)
! One can show that the costs of the contraction step are independent of the block size and the replication group size
! With gradients, the other two steps are carried out twice (Y_i_aP -> Gamma_i_aP, and dereplication)
! NL ... number of RI basis functions
! NR ... replication group size
! NG ... number of sub groups
! NB ... Block size
! o ... number of occupied orbitals
! Then, we have the communication costs (in multiples of the original BIb_C matrix)
! (NR/NG)+(1-(NR/NG))*(o/NB+NB-2)/NG = (NR/NG)*(1-(o/NB+NB-2)/NG)+(o/NB+NB-2)/NG
! and with gradients
! 2*(NR/NG)+2*(1-(NR/NG))*(o/NB+NB-2)/NG = (NR/NG)*(1-(o/NB+NB-2)/NG)+(o/NB+NB-2)/NG
! We are looking for the minimum of the communication volume,
! thus, if the prefactor of (NR/NG) is smaller than zero, use the largest possible replication group size.
! If the factor is larger than zero, set the replication group size to 1. (For small systems and a large number of subgroups)
! Replication group size = 1 implies that the integration group size equals the number of subgroups

integ_group_size = ngroup

! Multiply everything by homo*virtual to consider differences between spin channels in case of open-shell calculations
factor = REAL(SUM(homo*virtual), KIND=dp) &
- SUM((REAL(MAXVAL(homo), KIND=dp)/block_size + block_size - 2.0_dp)*homo*virtual)/ngroup
IF (SIZE(homo) == 2) factor = factor - 2.0_dp*PRODUCT(homo)/block_size/ngroup*SUM(homo*virtual)

IF (factor <= 0.0_dp) THEN
! Remove the fixed memory and divide by the memory per replication group size
max_repl_group_size = MIN(MAX(FLOOR((mem_real - mem_base - mem_per_blk*block_size)/ &
(mem_per_repl + mem_per_repl_blk*block_size)), 1), ngroup)
! Convert to an integration group size
min_integ_group_size = ngroup/max_repl_group_size

! Ensure that the integration group size is a divisor of the number of sub groups
DO iiB = MAX(MIN(min_integ_group_size, ngroup), 1), ngroup
! check that the ngroup is a multiple of integ_group_size
IF (MOD(ngroup, iiB) == 0) THEN
integ_group_size = iiB
EXIT
END IF
integ_group_size = integ_group_size + 1
END DO
END IF
ELSE ! We take the user provided group size
integ_group_size = ngroup/mp2_env%ri_mp2%number_integration_groups
END IF

IF (unit_nr > 0) THEN
Expand Down
1 change: 1 addition & 0 deletions src/mp2_setup.F
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ SUBROUTINE read_mp2_section(input, mp2_env)
mp2_env%method = ri_mp2_method_gpw
END IF
CALL section_vals_val_get(mp2_section, "RI_MP2%BLOCK_SIZE", i_val=mp2_env%ri_mp2%block_size)
CALL section_vals_val_get(mp2_section, "RI_MP2%NUMBER_INTEGRATION_GROUPS", i_val=mp2_env%ri_mp2%number_integration_groups)
CALL section_vals_val_get(mp2_section, "RI_MP2%PRINT_DGEMM_INFO", l_val=mp2_env%ri_mp2%print_dgemm_info)

CALL section_vals_val_get(mp2_section, "RI%ROW_BLOCK", i_val=mp2_env%block_size_row)
Expand Down
2 changes: 1 addition & 1 deletion src/mp2_types.F
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ MODULE mp2_types
END TYPE mp2_gpw_type

TYPE ri_mp2_type
INTEGER :: block_size
INTEGER :: block_size, number_integration_groups
LOGICAL :: print_dgemm_info
END TYPE

Expand Down
99 changes: 99 additions & 0 deletions tests/QS/regtest-mp2-grad-1/H2O_grad_gpw_single_group.inp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
&GLOBAL
PROJECT GRAD_H2O_gpw_single_group
PRINT_LEVEL LOW
RUN_TYPE GEO_OPT
&END GLOBAL
&MOTION
&GEO_OPT
MAX_ITER 1
&END
&END MOTION
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME HFX_BASIS
POTENTIAL_FILE_NAME POTENTIAL
&MGRID
CUTOFF 150
REL_CUTOFF 30
&END MGRID
&QS
METHOD GPW
EPS_DEFAULT 1.0E-12
&END QS
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-6
MAX_SCF 100
&END SCF
&XC
&XC_FUNCTIONAL NONE
&END XC_FUNCTIONAL
&HF
FRACTION 1.0000000
&SCREENING
SCREEN_ON_INITIAL_P .FALSE.
EPS_SCHWARZ 1.0E-6
EPS_SCHWARZ_FORCES 1.0E-6
&END SCREENING
&INTERACTION_POTENTIAL
POTENTIAL_TYPE TRUNCATED
CUTOFF_RADIUS 1.5
T_C_G_DATA t_c_g.dat
&END
&END HF
&WF_CORRELATION
&RI_MP2
BLOCK_SIZE 1
NUMBER_INTEGRATION_GROUPS 1
&END
&CANONICAL_GRADIENTS
EPS_CANONICAL 0.0001
FREE_HFX_BUFFER .TRUE.
&CPHF
EPS_CONV 1.0E-4
MAX_ITER 10
&END
&END
&INTEGRALS
&WFC_GPW
CUTOFF 50
REL_CUTOFF 20
EPS_FILTER 1.0E-12
EPS_GRID 1.0E-8
&END WFC_GPW
&END INTEGRALS
MEMORY 1.00
NUMBER_PROC 1
&END
&END XC
&END DFT
&PRINT
&FORCES
&END
&END
&SUBSYS
&CELL
ABC [angstrom] 5.0 5.0 5.0
&END CELL
&KIND H
BASIS_SET DZVP-GTH
BASIS_SET RI_AUX RI_DZVP-GTH
POTENTIAL GTH-HF-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH
BASIS_SET RI_AUX RI_DZVP-GTH
POTENTIAL GTH-HF-q6
&END KIND
&COORD
O 0.000000 0.000000 -0.211000
H 0.000000 -0.844000 0.495000
H 0.000000 0.744000 0.495000
&END
&TOPOLOGY
&CENTER_COORDINATES
&END
&END TOPOLOGY
&END SUBSYS
&END FORCE_EVAL
1 change: 1 addition & 0 deletions tests/QS/regtest-mp2-grad-1/TEST_FILES
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
H2O_grad_mme.inp 11 7e-09 -16.766973026874989
H2O_grad_gpw.inp 11 7e-08 -16.990048927268898
H2O_grad_gpw_single_group.inp 11 7e-08 -16.990048927268898
HF_dipole.inp 11 7e-08 -24.660454660161022
H2_H2_no_freeHFX.inp 11 2e-13 -2.307710999246907
H2_MP2_debug.inp 11 1e-10 -1.146241031776471
Expand Down

0 comments on commit 6769890

Please sign in to comment.