Skip to content

Commit

Permalink
low-scaling peridioc: reduce memory consumption and computational cos…
Browse files Browse the repository at this point in the history
…t of N^3 scaling step
  • Loading branch information
JWilhelm authored and oschuett committed Oct 17, 2023
1 parent 80abb51 commit 34a6a20
Show file tree
Hide file tree
Showing 40 changed files with 657 additions and 520 deletions.
631 changes: 306 additions & 325 deletions src/gw_methods.F

Large diffs are not rendered by default.

130 changes: 33 additions & 97 deletions src/gw_utils.F
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ MODULE gw_utils
cp_fm_struct_release,&
cp_fm_struct_type
USE cp_fm_types, ONLY: cp_fm_create,&
cp_fm_set_all,&
cp_fm_to_fm
USE cp_log_handling, ONLY: cp_get_default_logger,&
cp_logger_type
Expand Down Expand Up @@ -206,12 +207,11 @@ SUBROUTINE read_gw_input_parameters(bs_env, bs_sec)

CALL section_vals_val_get(gw_sec, "NUM_TIME_FREQ_POINTS", i_val=bs_env%num_time_freq_points)
CALL section_vals_val_get(gw_sec, "EPS_FILTER", r_val=bs_env%eps_filter)
! CALL section_vals_val_get(gw_sec, "EPS_3C_INT", r_val=bs_env%eps_3c_int)
CALL section_vals_val_get(gw_sec, "SIZE_LATTICE_SUM_V", i_val=bs_env%size_lattice_sum_V)
CALL section_vals_val_get(gw_sec, "MEMORY_PER_PROC", i_val=bs_env%input_memory_per_proc_GB)
CALL section_vals_val_get(gw_sec, "GROUP_SIZE_TENSOR", i_val=bs_env%group_size_tensor)
! CALL section_vals_val_get(gw_sec, "GROUP_SIZE_DIAG", i_val=bs_env%group_size_diag)
CALL section_vals_val_get(gw_sec, "KPOINTS_CHI_EPS_W", i_vals=bs_env%nkp_grid_chi_eps_W_input)
CALL section_vals_val_get(gw_sec, "APPROX_KP_EXTRAPOL", l_val=bs_env%approx_kp_extrapol)

CALL timestop(handle)

Expand Down Expand Up @@ -358,7 +358,7 @@ SUBROUTINE setup_kpoints_chi_eps_W(qs_env, bs_env, kpoints)
bs_env%nkp_grid_chi_eps_W_extra(1:3) = nkp_grid_extra(1:3)
bs_env%nkp_chi_eps_W_orig = nkp_orig
bs_env%nkp_chi_eps_W_extra = nkp_extra
bs_env%nkp_chi_eps_W_total = nkp
bs_env%nkp_chi_eps_W_orig_plus_extra = nkp

ALLOCATE (kpoints%xkp(3, nkp), kpoints%wkp(nkp))

Expand All @@ -376,11 +376,19 @@ SUBROUTINE setup_kpoints_chi_eps_W(qs_env, bs_env, kpoints)
CALL compute_xkp(kpoints%xkp, 1, nkp_orig, nkp_grid)
CALL compute_xkp(kpoints%xkp, nkp_orig + 1, nkp, nkp_grid_extra)

IF (bs_env%approx_kp_extrapol) THEN
bs_env%wkp_no_extra = 1.0_dp/REAL(nkp_orig, KIND=dp)
END IF

CALL kpoint_init_cell_index_simple(kpoints, qs_env)

! JW 2do: Parallelization over k-points (diagonalization of χ(iω,k) and further steps)
bs_env%ikp_local_start = 1
bs_env%ikp_local_end = kpoints%nkp
! heuristic parameter: how many k-points for χ, ε, and W are used simultaneously
! (less simultaneous k-points: less memory, but more computational effort because of
! recomputation of V(k))
bs_env%nkp_chi_eps_W_batch = 4

bs_env%num_chi_eps_W_batches = (bs_env%nkp_chi_eps_W_orig_plus_extra - 1)/ &
bs_env%nkp_chi_eps_W_batch + 1

u = bs_env%unit_nr

Expand All @@ -389,6 +397,8 @@ SUBROUTINE setup_kpoints_chi_eps_W(qs_env, bs_env, kpoints)
WRITE (UNIT=u, FMT="(T2,1A,T71,3I4)") "K-point mesh 1 for χ, ε, W", nkp_grid(1:3)
WRITE (UNIT=u, FMT="(T2,2A,T71,3I4)") "K-point mesh 2 for χ, ε, W ", &
"(for k-point extrapolation of W)", nkp_grid_extra(1:3)
WRITE (UNIT=u, FMT="(T2,A,T80,L)") "Approximate the k-point extrapolation", &
bs_env%approx_kp_extrapol
END IF

CALL timestop(handle)
Expand Down Expand Up @@ -474,8 +484,7 @@ SUBROUTINE allocate_and_fill_matrices_and_arrays(qs_env, bs_env)
CHARACTER(LEN=*), PARAMETER :: routineN = 'allocate_and_fill_matrices_and_arrays'

INTEGER :: handle, i_t, num_time_freq_points
TYPE(cp_blacs_env_type), POINTER :: blacs_env, blacs_env_kp_diag, &
blacs_env_tensor
TYPE(cp_blacs_env_type), POINTER :: blacs_env, blacs_env_tensor
TYPE(cp_fm_struct_type), POINTER :: fm_struct, fm_struct_RI_global
TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_s
TYPE(mp_para_env_type), POINTER :: para_env
Expand All @@ -500,6 +509,12 @@ SUBROUTINE allocate_and_fill_matrices_and_arrays(qs_env, bs_env)
CALL cp_fm_create(bs_env%fm_RI_RI, fm_struct_RI_global)
CALL cp_fm_create(bs_env%fm_chi_Gamma_freq, fm_struct_RI_global)
CALL cp_fm_create(bs_env%fm_W_MIC_freq, fm_struct_RI_global)
IF (bs_env%approx_kp_extrapol) THEN
CALL cp_fm_create(bs_env%fm_W_MIC_freq_1_extra, fm_struct_RI_global)
CALL cp_fm_create(bs_env%fm_W_MIC_freq_1_no_extra, fm_struct_RI_global)
CALL cp_fm_set_all(bs_env%fm_W_MIC_freq_1_extra, 0.0_dp)
CALL cp_fm_set_all(bs_env%fm_W_MIC_freq_1_no_extra, 0.0_dp)
END IF
CALL cp_fm_struct_release(fm_struct_RI_global)

ALLOCATE (bs_env%eigenval_G0W0(bs_env%n_ao, bs_env%nkp_DOS))
Expand All @@ -516,17 +531,6 @@ SUBROUTINE allocate_and_fill_matrices_and_arrays(qs_env, bs_env)
NULLIFY (blacs_env_tensor)
CALL cp_blacs_env_create(blacs_env=blacs_env_tensor, para_env=bs_env%para_env_tensor)

NULLIFY (blacs_env_kp_diag)
CALL cp_blacs_env_create(blacs_env=blacs_env_kp_diag, para_env=bs_env%para_env_kp_diag)

! JW TODO later cfm matrices in kp diag subgroups
! NULLIFY (fm_struct_RI_kp_diag)
! CALL cp_fm_struct_create(fm_struct_RI_kp_diag, context=blacs_env_kp_diag, &
! nrow_global=bs_env%n_RI, ncol_global=bs_env%n_RI, &
! para_env=bs_env%para_env_tensor)
!
! CALL cp_fm_struct_release(fm_struct_RI_kp_diag)

! allocate dbcsr matrices in the tensor subgroup; actually, one only needs a small
! subset of blocks in the tensor subgroup, however, all atomic blocks are allocated.
! One might think of creating a dbcsr matrix with only the blocks that are needed
Expand All @@ -541,7 +545,6 @@ SUBROUTINE allocate_and_fill_matrices_and_arrays(qs_env, bs_env)
blacs_env, do_ri_aux_basis=.TRUE.)

CALL cp_blacs_env_release(blacs_env_tensor)
CALL cp_blacs_env_release(blacs_env_kp_diag)

NULLIFY (bs_env%mat_chi_Gamma_tau)
CALL dbcsr_allocate_matrix_set(bs_env%mat_chi_Gamma_tau, bs_env%num_time_freq_points)
Expand Down Expand Up @@ -965,13 +968,12 @@ SUBROUTINE check_for_restart_files(qs_env, bs_env)

CHARACTER(LEN=*), PARAMETER :: routineN = 'check_for_restart_files'

CHARACTER(LEN=default_path_length) :: f_chi, f_Goc, f_Gvi, f_MV, f_S_n, f_S_p, &
f_S_x, f_Vs, f_W_t, f_W_w, prefix, &
project_name
INTEGER :: handle, i_t_or_w, ikp_re_im, &
num_time_freq_points
LOGICAL :: chi_exists, G_occ_exists, G_vir_exists, MV_exists, Sigma_neg_time_exists, &
Sigma_pos_time_exists, Vs_exists, W_time_exists
CHARACTER(LEN=default_path_length) :: f_chi, f_Goc, f_Gvi, f_S_n, f_S_p, &
f_S_x, f_W_t, prefix, project_name
INTEGER :: handle, i_t_or_w, num_time_freq_points
LOGICAL :: chi_exists, G_occ_exists, G_vir_exists, &
Sigma_neg_time_exists, &
Sigma_pos_time_exists, W_time_exists
TYPE(cp_logger_type), POINTER :: logger
TYPE(section_vals_type), POINTER :: input, print_key

Expand All @@ -981,8 +983,6 @@ SUBROUTINE check_for_restart_files(qs_env, bs_env)

ALLOCATE (bs_env%read_chi_and_G_occ_vir(num_time_freq_points))
ALLOCATE (bs_env%calc_chi_and_G_occ_vir(num_time_freq_points))
ALLOCATE (bs_env%read_W_freq(num_time_freq_points))
ALLOCATE (bs_env%calc_W_freq(num_time_freq_points))
ALLOCATE (bs_env%Sigma_c_exists(num_time_freq_points))

CALL get_qs_env(qs_env, input=input)
Expand All @@ -994,22 +994,21 @@ SUBROUTINE check_for_restart_files(qs_env, bs_env)
WRITE (prefix, '(2A)') TRIM(project_name), "-RESTART_"
bs_env%prefix = prefix

bs_env%read_all_W_time = .TRUE.
bs_env%all_W_exist = .TRUE.

DO i_t_or_w = 1, num_time_freq_points

IF (i_t_or_w < 10) THEN
WRITE (f_chi, '(3A,I1,A)') TRIM(prefix), bs_env%chi_name, "_00", i_t_or_w, ".matrix"
WRITE (f_Goc, '(3A,I1,A)') TRIM(prefix), bs_env%Gocc_name, "_00", i_t_or_w, ".matrix"
WRITE (f_Gvi, '(3A,I1,A)') TRIM(prefix), bs_env%Gvir_name, "_00", i_t_or_w, ".matrix"
WRITE (f_W_w, '(3A,I1,A)') TRIM(prefix), bs_env%W_freq_name, "_00", i_t_or_w, ".matrix"
WRITE (f_W_t, '(3A,I1,A)') TRIM(prefix), bs_env%W_time_name, "_00", i_t_or_w, ".matrix"
WRITE (f_S_p, '(3A,I1,A)') TRIM(prefix), bs_env%Sigma_p_name, "_00", i_t_or_w, ".matrix"
WRITE (f_S_n, '(3A,I1,A)') TRIM(prefix), bs_env%Sigma_n_name, "_00", i_t_or_w, ".matrix"
ELSE IF (i_t_or_w < 100) THEN
WRITE (f_chi, '(3A,I2,A)') TRIM(prefix), bs_env%chi_name, "_0", i_t_or_w, ".matrix"
WRITE (f_Goc, '(3A,I2,A)') TRIM(prefix), bs_env%Gocc_name, "_0", i_t_or_w, ".matrix"
WRITE (f_Gvi, '(3A,I2,A)') TRIM(prefix), bs_env%Gvir_name, "_0", i_t_or_w, ".matrix"
WRITE (f_W_w, '(3A,I2,A)') TRIM(prefix), bs_env%W_freq_name, "_0", i_t_or_w, ".matrix"
WRITE (f_W_t, '(3A,I2,A)') TRIM(prefix), bs_env%W_time_name, "_0", i_t_or_w, ".matrix"
WRITE (f_S_p, '(3A,I2,A)') TRIM(prefix), bs_env%Sigma_p_name, "_0", i_t_or_w, ".matrix"
WRITE (f_S_n, '(3A,I2,A)') TRIM(prefix), bs_env%Sigma_n_name, "_0", i_t_or_w, ".matrix"
Expand All @@ -1020,68 +1019,27 @@ SUBROUTINE check_for_restart_files(qs_env, bs_env)
INQUIRE (file=TRIM(f_chi), exist=chi_exists)
INQUIRE (file=TRIM(f_Goc), exist=G_occ_exists)
INQUIRE (file=TRIM(f_Gvi), exist=G_vir_exists)
INQUIRE (file=TRIM(f_W_w), exist=bs_env%read_W_freq(i_t_or_w))
INQUIRE (file=TRIM(f_W_t), exist=W_time_exists)
INQUIRE (file=TRIM(f_S_p), exist=Sigma_pos_time_exists)
INQUIRE (file=TRIM(f_S_n), exist=Sigma_neg_time_exists)

bs_env%read_chi_and_G_occ_vir(i_t_or_w) = chi_exists .AND. G_occ_exists .AND. G_vir_exists
bs_env%calc_chi_and_G_occ_vir(i_t_or_w) = .NOT. bs_env%read_chi_and_G_occ_vir(i_t_or_w)

bs_env%calc_W_freq(i_t_or_w) = .NOT. bs_env%read_W_freq(i_t_or_w)

bs_env%read_all_W_time = W_time_exists .AND. bs_env%read_all_W_time
bs_env%all_W_exist = bs_env%all_W_exist .AND. W_time_exists

bs_env%Sigma_c_exists(i_t_or_w) = Sigma_pos_time_exists .AND. Sigma_neg_time_exists

IF (bs_env%Sigma_c_exists(i_t_or_w)) THEN
bs_env%read_chi_and_G_occ_vir(i_t_or_w) = .FALSE.
bs_env%calc_chi_and_G_occ_vir(i_t_or_w) = .FALSE.
bs_env%read_W_freq(i_t_or_w) = .FALSE.
bs_env%calc_W_freq(i_t_or_w) = .FALSE.
END IF

END DO

bs_env%calc_all_W_time = .NOT. bs_env%read_all_W_time

bs_env%read_MinvVsqrt_Vsqrt = .TRUE.

DO ikp_re_im = 1, 2*bs_env%nkp_chi_eps_W_total

IF (ikp_re_im < 10) THEN
WRITE (f_MV, '(3A,I1,A)') TRIM(prefix), bs_env%MV_name, "_00", ikp_re_im, ".matrix"
WRITE (f_Vs, '(3A,I1,A)') TRIM(prefix), bs_env%Vs_name, "_00", ikp_re_im, ".matrix"
ELSE IF (ikp_re_im < 100) THEN
WRITE (f_MV, '(3A,I2,A)') TRIM(prefix), bs_env%MV_name, "_0", ikp_re_im, ".matrix"
WRITE (f_Vs, '(3A,I2,A)') TRIM(prefix), bs_env%Vs_name, "_0", ikp_re_im, ".matrix"
ELSE IF (ikp_re_im < 1000) THEN
WRITE (f_MV, '(3A,I3,A)') TRIM(prefix), bs_env%MV_name, "_", ikp_re_im, ".matrix"
WRITE (f_Vs, '(3A,I3,A)') TRIM(prefix), bs_env%Vs_name, "_", ikp_re_im, ".matrix"
ELSE
CPABORT('Please implement more than 999 kpoints.')
END IF

INQUIRE (file=TRIM(f_MV), exist=MV_exists)
INQUIRE (file=TRIM(f_Vs), exist=Vs_exists)

! matrices MV and VS need to be present for all k-points; otherwise no restart
bs_env%read_MinvVsqrt_Vsqrt = MV_exists .AND. Vs_exists .AND. bs_env%read_MinvVsqrt_Vsqrt

END DO

bs_env%calc_MinvVsqrt_Vsqrt = .NOT. bs_env%read_MinvVsqrt_Vsqrt

IF (bs_env%read_all_W_time) bs_env%read_MinvVsqrt_Vsqrt = .FALSE.

WRITE (f_S_x, '(3A)') TRIM(prefix), bs_env%Sigma_x_name, "_000.matrix"
INQUIRE (file=TRIM(f_S_x), exist=bs_env%Sigma_x_exists)

IF (ALL(bs_env%Sigma_c_exists)) THEN
bs_env%read_all_W_time = .FALSE.
bs_env%calc_all_W_time = .FALSE.
END IF

CALL timestop(handle)

END SUBROUTINE check_for_restart_files
Expand Down Expand Up @@ -1137,34 +1095,15 @@ SUBROUTINE set_parallelization_parameters(qs_env, bs_env)
dummy_1, dummy_2, color_sub, bs_env)
END DO

IF (bs_env%group_size_diag == -1) THEN
IF (num_pe < bs_env%nkp_chi_eps_W_total) THEN
bs_env%group_size_diag = num_pe
bs_env%num_diag_groups = 1
ELSE
bs_env%group_size_diag = num_pe/bs_env%nkp_chi_eps_W_total
bs_env%num_diag_groups = num_pe/bs_env%group_size_diag
END IF
END IF
bs_env%group_size_diag = num_pe

color_sub = para_env%mepos/bs_env%group_size_diag
bs_env%diag_group_color = color_sub

ALLOCATE (bs_env%para_env_kp_diag)
CALL bs_env%para_env_kp_diag%from_split(para_env, color_sub)

CALL m_memory(mem)
CALL bs_env%para_env%max(mem)

bs_env%input_memory_per_proc = INT(bs_env%input_memory_per_proc_GB*1.0E9_dp)
bs_env%input_memory_per_proc = INT(bs_env%input_memory_per_proc_GB*1.0E9_dp, KIND=int_8)

u = bs_env%unit_nr
IF (u > 0) THEN
WRITE (u, '(T2,A)') ''
WRITE (u, '(T2,A,I47)') 'Group size for tensor operations', bs_env%group_size_tensor
WRITE (u, '(T2,2A,I9)') 'Group size for diagonalizing χ(k) and ', &
'calculating ε(k), W(k) and W^MIC', bs_env%group_size_diag
WRITE (u, '(T2,A)') ''
WRITE (u, '(T2,A,I37,A)') 'Input: Available memory per MPI process', &
bs_env%input_memory_per_proc_GB, ' GB'
Expand Down Expand Up @@ -1370,9 +1309,6 @@ SUBROUTINE set_heuristic_parameters(bs_env, qs_env)
bs_env%regularization_RI = 1.0E-2_dp
END IF

! use all processors to invert ε_PQ(iω,k)
bs_env%group_size_diag = -1

! truncated Coulomb operator for exchange self-energy
! (see details in Guidon, VandeVondele, Hutter, JCTC 5, 3010 (2009) and references therein)
CALL setup_trunc_coulomb_pot_for_exchange_self_energy(qs_env, bs_env%trunc_coulomb, &
Expand Down
15 changes: 12 additions & 3 deletions src/input_cp2k_properties_dft.F
Original file line number Diff line number Diff line change
Expand Up @@ -2041,10 +2041,10 @@ SUBROUTINE create_gw_section(section)
CALL keyword_create(keyword, __LOCATION__, name="SIZE_LATTICE_SUM_V", &
description="Determines number of cells used for summing the "// &
"cells R in the Coulomb matrix, "// &
"V_PQ(k) = \sum_R <P,cell=0 | 1/r | Q,cell=R>. SIZE_LATTICE_SUM_V 5 "// &
"V_PQ(k) = \sum_R <P,cell=0 | 1/r | Q,cell=R>. SIZE_LATTICE_SUM_V 4 "// &
"gives excellent convergence, parameter does not need to be touched.", &
usage="SIZE_LATTICE_SUM_V 4", &
default_i_val=5)
default_i_val=4)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

Expand Down Expand Up @@ -2073,7 +2073,7 @@ SUBROUTINE create_gw_section(section)
CALL keyword_create(keyword, __LOCATION__, name="GROUP_SIZE_TENSOR", &
description="Specify the number of MPI processes for a group "// &
"performing tensor operations. If '-1', automatic choice which "// &
"should be good enough for most cases. ", &
"should be good enough for most cases.", &
usage="GROUP_SIZE_TENSOR 16", &
default_i_val=-1)
CALL section_add_keyword(section, keyword)
Expand All @@ -2096,6 +2096,15 @@ SUBROUTINE create_gw_section(section)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

CALL keyword_create(keyword, __LOCATION__, name="APPROX_KP_EXTRAPOL", &
description="If true, use only a 2x2 kpoint mesh for frequency "// &
"points ω_j, j >= 2, and get the k-point extrapolation of "// &
"W_PQ(iω_j,q) approximately from W_PQ(iω_1,q).", &
usage="APPROX_KP_EXTRAPOL", &
default_l_val=.FALSE., lone_keyword_l_val=.TRUE.)
CALL section_add_keyword(section, keyword)
CALL keyword_release(keyword)

NULLIFY (subsection, print_key)
CALL section_create(subsection, __LOCATION__, name="PRINT", &
description="Printing of GW restarts.", &
Expand Down

0 comments on commit 34a6a20

Please sign in to comment.