Skip to content

Commit

Permalink
RI-HFX| memory management improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
abussy committed Dec 6, 2021
1 parent e33bbad commit 8d484e4
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 225 deletions.
330 changes: 155 additions & 175 deletions src/hfx_ri.F

Large diffs are not rendered by default.

36 changes: 7 additions & 29 deletions src/hfx_types.F
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ MODULE hfx_types
do_hfx_auto_shells, do_potential_coulomb, do_potential_gaussian, do_potential_id, &
do_potential_long, do_potential_mix_cl, do_potential_mix_cl_trunc, do_potential_mix_lg, &
do_potential_short, do_potential_truncated, hfx_ri_do_2c_diag, hfx_ri_do_2c_iter
USE input_cp2k_hfx, ONLY: ri_dense,&
ri_min_block,&
ri_mo,&
USE input_cp2k_hfx, ONLY: ri_mo,&
ri_pmat
USE input_section_types, ONLY: section_vals_get,&
section_vals_get_subs_vals,&
Expand Down Expand Up @@ -374,7 +372,7 @@ MODULE hfx_types
REAL(KIND=dp) :: filter_eps, filter_eps_2c, filter_eps_storage, filter_eps_mo, &
eps_lanczos, eps_pgf_orb, eps_eigval
INTEGER :: t2c_sqrt_order, max_iter_lanczos, flavor, unit_nr_dbcsr, unit_nr, &
min_bsize, max_bsize_MO, t2c_method, nelectron_total, default_bsize_RI, input_flavor
min_bsize, max_bsize_MO, t2c_method, nelectron_total, input_flavor
LOGICAL :: check_2c_inv, calc_condnum

TYPE(libint_potential_type) :: ri_metric
Expand Down Expand Up @@ -440,7 +438,7 @@ MODULE hfx_types
CHARACTER(len=default_string_length) :: orb_basis_type, ri_basis_type

! memory reduction factor
INTEGER :: n_mem_input, n_mem, n_mem_RI
INTEGER :: n_mem_input, n_mem, n_mem_RI, n_mem_flavor_switch

! offsets for memory batches
INTEGER, DIMENSION(:), ALLOCATABLE :: starts_array_mem_block, ends_array_mem_block
Expand Down Expand Up @@ -1138,8 +1136,8 @@ SUBROUTINE hfx_ri_init_read_input(ri_data, ri_section, qs_kind_set, &
CALL section_vals_val_get(ri_section, "EPS_PGF_ORB", r_val=ri_data%eps_pgf_orb)
CALL section_vals_val_get(ri_section, "MIN_BLOCK_SIZE", i_val=ri_data%min_bsize)
CALL section_vals_val_get(ri_section, "MAX_BLOCK_SIZE_MO", i_val=ri_data%max_bsize_MO)
CALL section_vals_val_get(ri_section, "BLOCK_SIZE_RI", i_val=ri_data%default_bsize_RI)
CALL section_vals_val_get(ri_section, "MEMORY_CUT", i_val=ri_data%n_mem_input)
CALL section_vals_val_get(ri_section, "FLAVOR_SWITCH_MEMORY_CUT", i_val=ri_data%n_mem_flavor_switch)

ri_data%orb_basis_type = orb_basis_type
ri_data%ri_basis_type = ri_basis_type
Expand Down Expand Up @@ -1188,8 +1186,7 @@ SUBROUTINE hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para
CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_init'

INTEGER :: handle, i_mem, iproc, j_mem, MO_dim, &
mp_comm_3d, natom, nkind, nproc, &
RI_bsize
mp_comm_3d, natom, nkind, nproc
INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_AO_store, bsizes_RI_store, dist1, &
dist2, dist3, dist_AO_1, dist_AO_2, &
dist_RI
Expand Down Expand Up @@ -1269,21 +1266,9 @@ SUBROUTINE hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para

ri_data%num_pe = para_env%num_pe

!Default RI block size
SELECT CASE (ri_data%default_bsize_RI)
CASE (ri_min_block)
CALL pgf_block_sizes(atomic_kind_set, basis_set_RI, ri_data%min_bsize, ri_data%bsizes_RI_split)

CASE (ri_dense)
!Best GPU dimension is 64*64=4*4*256=4096
RI_bsize = 4096/ri_data%min_bsize**2
!Want at least one block per proc row/col
RI_bsize = MIN(RI_bsize, SUM(ri_data%bsizes_RI)/(FLOOR(SQRT(REAL(ri_data%num_pe, dp)))))
CALL split_block_sizes([SUM(ri_data%bsizes_RI)], ri_data%bsizes_RI_split, RI_bsize)
END SELECT

! initialize tensors expressed in basis representation
CALL pgf_block_sizes(atomic_kind_set, basis_set_AO, ri_data%min_bsize, ri_data%bsizes_AO_split)
CALL pgf_block_sizes(atomic_kind_set, basis_set_RI, ri_data%min_bsize, ri_data%bsizes_RI_split)

CALL pgf_block_sizes(atomic_kind_set, basis_set_AO, 1, bsizes_AO_store)
CALL pgf_block_sizes(atomic_kind_set, basis_set_RI, 1, bsizes_RI_store)
Expand All @@ -1310,6 +1295,7 @@ SUBROUTINE hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para
ALLOCATE (ri_data%pgrid_1)
ALLOCATE (ri_data%pgrid_2)
pdims = 0

CALL dbcsr_t_mp_dims_create(nproc, pdims, [SIZE(ri_data%bsizes_AO_split), SIZE(ri_data%bsizes_RI_split), &
SIZE(ri_data%bsizes_AO_split)])

Expand Down Expand Up @@ -2655,14 +2641,6 @@ SUBROUTINE hfx_print_ri_info(ri_data, hfx_section)
"HFX_RI_INFO| Minimum block size", ri_data%min_bsize
WRITE (UNIT=iw, FMT="(T3, A, T78, I3)") &
"HFX_RI_INFO| MO block size", ri_data%max_bsize_MO
SELECT CASE (ri_data%default_bsize_RI)
CASE (ri_min_block)
WRITE (UNIT=iw, FMT="(T3, A, T74, A)") &
"HFX_RI_INFO| RI block size:", "MINIMAL"
CASE (ri_dense)
WRITE (UNIT=iw, FMT="(T3, A, T76, A)") &
"HFX_RI_INFO| RI block size:", "DENSE"
END SELECT
WRITE (UNIT=iw, FMT="(T3, A, T79, I2)") &
"HFX_RI_INFO| Memory reduction factor", ri_data%n_mem_input
END IF
Expand Down
28 changes: 10 additions & 18 deletions src/input_cp2k_hfx.F
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ MODULE input_cp2k_hfx

LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .FALSE.
CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'input_cp2k_hfx'
INTEGER, PARAMETER, PUBLIC :: ri_mo = 1, ri_pmat = 2, &
ri_min_block = 1, ri_dense = 2
INTEGER, PARAMETER, PUBLIC :: ri_mo = 1, ri_pmat = 2

PUBLIC :: create_hfx_section

Expand Down Expand Up @@ -640,26 +639,19 @@ SUBROUTINE create_hf_ri_section(section)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

CALL keyword_create(keyword, __LOCATION__, name="BLOCK_SIZE_RI", &
description="Default block size in the RI dimension of 3-center tensors. "// &
"Smaller blocks lead to more sparsity, but also more overhead in "// &
"GPU tensor contraction.", &
enum_c_vals=s2a("MINIMAL", "DENSE"), &
enum_desc=s2a("The RI block size is set to match MIN_BLOCK_SIZE for maximum sparsity. "// &
"Ideal for sparse systems with a short range RI_METRIC.", &
"The RI block sizes are such that AOxAOxRI blocks have the ideal "// &
"size for GPU tensor contractions. Comes at the cost of sparsity. "// &
"Better for dense systems and/or diffuse basis sets. "), &
enum_i_vals=(/ri_min_block, ri_dense/), &
default_i_val=ri_min_block)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

CALL keyword_create(keyword, __LOCATION__, name="MEMORY_CUT", &
description="Memory reduction factor. This keyword controls the batching of tensor "// &
"contractions into smaller, more manageable chunks. The details vary "// &
"depending on the RI_FLAVOR.", &
default_i_val=8)
default_i_val=9)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

CALL keyword_create(keyword, __LOCATION__, name="FLAVOR_SWITCH_MEMORY_CUT", &
description="Memory reduction factor to be applied upon RI_FLAVOR switching "// &
"from MO to RHO. The RHO flavor typically requires more memory, "// &
"and depending on the ressources available, a higher MEMORY_CUT.", &
default_i_val=9)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

Expand Down
1 change: 0 additions & 1 deletion tests/QS/regtest-hfx-ri-2/CH3-b3lyp-ADMM.inp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
&HF
FRACTION 0.2
&RI
BLOCK_SIZE_RI DENSE
&END
&END
&END XC
Expand Down
1 change: 0 additions & 1 deletion tests/QS/regtest-hfx-ri-2/Ne-hfx-pbc-metric-mo.inp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
&RI
RI_FLAVOR MO
RI_METRIC IDENTITY
BLOCK_SIZE_RI MINIMAL
&END
&INTERACTION_POTENTIAL
POTENTIAL_TYPE SHORTRANGE
Expand Down
1 change: 0 additions & 1 deletion tests/QS/regtest-hfx-ri-2/Ne-hfx-pbc-metric-rho.inp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
&RI
RI_FLAVOR RHO
RI_METRIC IDENTITY
BLOCK_SIZE_RI MINIMAL
&END
&INTERACTION_POTENTIAL
POTENTIAL_TYPE TRUNCATED
Expand Down

0 comments on commit 8d484e4

Please sign in to comment.