Skip to content

Commit

Permalink
RI HFX: print DBCSR performance
Browse files Browse the repository at this point in the history
  • Loading branch information
pseewald committed Mar 7, 2020
1 parent 49797a4 commit f483131
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 11 deletions.
72 changes: 61 additions & 11 deletions src/hfx_ri.F
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ MODULE hfx_ri
USE iterate_matrix, ONLY: invert_hotelling,&
matrix_sqrt_newton_schulz
USE kinds, ONLY: default_string_length,&
dp
dp,&
int_8
USE machine, ONLY: m_walltime
USE message_passing, ONLY: mp_allgather,&
mp_cart_create,&
mp_environ
mp_environ,&
mp_sync
USE particle_methods, ONLY: get_particle_set
USE particle_types, ONLY: particle_type
USE qs_environment_types, ONLY: get_qs_env,&
Expand Down Expand Up @@ -931,13 +934,15 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &

INTEGER :: bsum, comm_2d, count, handle, handle2, i_mem, iproc, ispin, max_pdim_mo, n_mem, &
n_mos, nproc, nproc_rem, pdim_AO, pdim_AO_, pdim_AO__, pdim_mo, tdim_mo, unit_nr_dbcsr
INTEGER(int_8) :: nflop
INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2, dist3, mem_end, &
mem_end_block, mem_start, &
mem_start_block, mo_bsizes
INTEGER, ALLOCATABLE, DIMENSION(:, :) :: bounds
INTEGER, DIMENSION(2) :: pdims_2d
INTEGER, DIMENSION(3) :: pcoord, pdims, pdims_AO, pdims_RI
LOGICAL :: do_initialize
REAL(dp) :: t1, t2
TYPE(cp_para_env_type), POINTER :: para_env
TYPE(dbcsr_distribution_type) :: ks_dist
TYPE(dbcsr_t_pgrid_type) :: pgrid, pgrid_2d
Expand Down Expand Up @@ -973,6 +978,9 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
CALL get_qs_env(qs_env, para_env=para_env)

ALLOCATE (bounds(2, 1))

CALL mp_sync(para_env%group)
t1 = m_walltime()
DO ispin = 1, nspins
CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)

Expand Down Expand Up @@ -1117,7 +1125,10 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
bounds_2=bounds, &
filter_eps=ri_data%filter_eps_mo/2, &
unit_nr=unit_nr_dbcsr, &
move_data=.FALSE.)
move_data=.FALSE., &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop

CALL timestop(handle2)
CALL timeset(routineN//"_copy_1", handle2)
Expand All @@ -1137,7 +1148,11 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
bounds_2=bounds, &
filter_eps=ri_data%filter_eps_mo/2, &
unit_nr=unit_nr_dbcsr, &
move_data=.FALSE.)
move_data=.FALSE., &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop

CALL timestop(handle2)
CALL timeset(routineN//"_copy_1", handle2)
CALL dbcsr_t_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
Expand All @@ -1163,7 +1178,11 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
contract_2=[1], notcontract_2=[2, 3], &
map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
pgrid_opt_3=pgrid_opt_RI, &
unit_nr=unit_nr_dbcsr)
unit_nr=unit_nr_dbcsr, &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop

CALL timestop(handle2)

CALL timeset(routineN//"_copy_2", handle2)
Expand All @@ -1185,7 +1204,11 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
contract_2=[1, 2], notcontract_2=[3], &
map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
pgrid_opt_1=pgrid_opt_KS, &
unit_nr=unit_nr_dbcsr, move_data=.TRUE.)
unit_nr=unit_nr_dbcsr, move_data=.TRUE., &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop

CALL timestop(handle2)
ENDDO

Expand Down Expand Up @@ -1216,6 +1239,11 @@ SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &

ENDDO

CALL mp_sync(para_env%group)
t2 = m_walltime()

ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1

CALL timestop(handle)

END SUBROUTINE
Expand Down Expand Up @@ -1246,21 +1274,25 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
INTEGER :: col, handle, handle2, iblk, iblk_filter, &
iblkrow, ispin, nblk, nblk_filter, &
row, row_end, row_start, unit_nr_dbcsr
INTEGER(int_8) :: nflop
INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2
INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ctr_ind, ctr_ind_tmp
LOGICAL :: found
REAL(dp) :: t1, t2
TYPE(cp_para_env_type), POINTER :: para_env
TYPE(dbcsr_t_pgrid_type), POINTER :: pgrid_opt
TYPE(dbcsr_t_type) :: ks_t, ks_tmp, rho_ao_t, rho_ao_tmp, &
t_3c_1, t_3c_2

CALL timeset(routineN, handle)

NULLIFY (pgrid_opt)
NULLIFY (pgrid_opt, para_env)

! get a useful output_unit

unit_nr_dbcsr = ri_data%unit_nr_dbcsr

CALL get_qs_env(qs_env, para_env=para_env)

CPASSERT(SIZE(ks_matrix, 2) == 1)

IF (geometry_did_change) THEN
Expand Down Expand Up @@ -1289,6 +1321,8 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
CALL dbcsr_t_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_1)
CALL dbcsr_t_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)

CALL mp_sync(para_env%group)
t1 = m_walltime()
DO ispin = 1, nspins
CALL dbcsr_t_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
CALL dbcsr_t_copy(rho_ao_tmp, rho_ao_t, move_data=.TRUE.)
Expand All @@ -1300,7 +1334,11 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
contract_1=[2], notcontract_1=[1], &
contract_2=[3], notcontract_2=[1, 2], &
map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
unit_nr=unit_nr_dbcsr)
unit_nr=unit_nr_dbcsr, &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop

CALL dbcsr_t_clear(rho_ao_t)

CALL timestop(handle2)
Expand Down Expand Up @@ -1360,7 +1398,10 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
unit_nr=unit_nr_dbcsr, &
retain_sparsity=.TRUE., &
pgrid_opt_2=pgrid_opt)
pgrid_opt_2=pgrid_opt, &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
CALL dbcsr_t_clear(ri_data%t_3c_int_ctr_3(1, 1))

CALL dbcsr_t_filter(t_3c_2, ri_data%filter_eps)
Expand All @@ -1385,7 +1426,11 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
contract_2=[1, 2], notcontract_2=[3], &
map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
unit_nr=unit_nr_dbcsr, &
pgrid_opt_1=pgrid_opt)
pgrid_opt_1=pgrid_opt, &
flop=nflop)

ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop

CALL dbcsr_t_clear(t_3c_1)
CALL timestop(handle2)

Expand All @@ -1402,6 +1447,11 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
ri_data%pgrid_1 => pgrid_opt
ENDDO

CALL mp_sync(para_env%group)
t2 = m_walltime()

ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1

CALL dbcsr_t_destroy(t_3c_1)
CALL dbcsr_t_destroy(t_3c_2)

Expand Down
33 changes: 33 additions & 0 deletions src/hfx_types.F
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,11 @@ MODULE hfx_types
! relevant non-zero RI-AO pairs
INTEGER, DIMENSION(:, :), ALLOCATABLE :: nonzero_pairs
INTEGER, DIMENSION(:), ALLOCATABLE :: nonzero_rows

INTEGER(int_8) :: dbcsr_nflop
REAL(dp) :: dbcsr_time
INTEGER :: num_pe

END TYPE

! **************************************************************************************************
Expand Down Expand Up @@ -1345,10 +1350,36 @@ SUBROUTINE hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para
[1], [2, 3], name="(AO | RI AO)")

ENDIF

ri_data%dbcsr_nflop = 0
ri_data%dbcsr_time = 0.0_dp
ri_data%num_pe = para_env%num_pe

CALL timestop(handle)

END SUBROUTINE

! **************************************************************************************************
!> \brief ...
!> \param ri_data ...
! **************************************************************************************************
SUBROUTINE hfx_ri_write_stats(ri_data)
TYPE(hfx_ri_type), INTENT(IN) :: ri_data

REAL(dp) :: my_flop_rate

ASSOCIATE (unit_nr=>ri_data%unit_nr, dbcsr_nflop=>ri_data%dbcsr_nflop, &
dbcsr_time=>ri_data%dbcsr_time, num_pe=>ri_data%num_pe)
my_flop_rate = REAL(dbcsr_nflop, dp)/(1.0E09_dp*ri_data%dbcsr_time)
IF (unit_nr > 0) WRITE (UNIT=unit_nr, FMT="(/T2,A,T73,ES8.2)") &
"RI-HFX PERFORMANCE| DBCSR total number of flops:", REAL(dbcsr_nflop*num_pe, dp)
IF (unit_nr > 0) WRITE (UNIT=unit_nr, FMT="(T2,A,T66,F15.2)") &
"RI-HFX PERFORMANCE| DBCSR total execution time:", dbcsr_time
IF (unit_nr > 0) WRITE (UNIT=unit_nr, FMT="(T2,A,T66,F15.2)") &
"RI-HFX PERFORMANCE| DBCSR flop rate (Gflops / MPI rank):", my_flop_rate
END ASSOCIATE
END SUBROUTINE

! **************************************************************************************************
!> \brief ...
!> \param ri_data ...
Expand All @@ -1363,6 +1394,8 @@ SUBROUTINE hfx_ri_release(ri_data)

CALL timeset(routineN, handle)

CALL hfx_ri_write_stats(ri_data)

IF (ASSOCIATED(ri_data%pgrid)) THEN
CALL dbcsr_t_pgrid_destroy(ri_data%pgrid)
DEALLOCATE (ri_data%pgrid)
Expand Down

0 comments on commit f483131

Please sign in to comment.