Skip to content

Commit

Permalink
Slightly revised build_3c_integrals (#3360)
Browse files Browse the repository at this point in the history
  • Loading branch information
hfp committed Apr 23, 2024
1 parent 1f285aa commit adfb00c
Showing 1 changed file with 68 additions and 74 deletions.
142 changes: 68 additions & 74 deletions src/qs_tensors.F
Expand Up @@ -2409,10 +2409,10 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
found, skip
REAL(KIND=dp) :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
kind_radius_i, kind_radius_j, &
kind_radius_k, prefac, sijk_ext
kind_radius_k, max_contraction_i, &
prefac, sijk_ext
REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ccp_buffer, cpp_buffer, &
max_contraction_i, max_contraction_j, &
max_contraction_k
max_contraction_j, max_contraction_k
REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: block_t, dummy_block_t, sijk, &
sijk_contr, tmp_ijk
REAL(KIND=dp), DIMENSION(1, 1, 1) :: counter
Expand Down Expand Up @@ -2566,8 +2566,8 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
END DO
m_max = maxli + maxlj + maxlk

!To minimize expensive memory opsand generally optimize contraction, pre-allocate
!contiguous sphi arrays (and transposed in the cas of sphi_i)
!To minimize expensive memory ops and generally optimize contraction, pre-allocate
!contiguous sphi arrays (and transposed in the case of sphi_i)

NULLIFY (tspj, spi, spk)
ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
Expand Down Expand Up @@ -2762,22 +2762,13 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE

ALLOCATE (max_contraction_i(nseti))
max_contraction_i = 0.0_dp
DO iset = 1, nseti
sgfi = first_sgf_i(1, iset)
max_contraction_i(iset) = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
END DO

ALLOCATE (max_contraction_j(nsetj))
max_contraction_j = 0.0_dp
DO jset = 1, nsetj
sgfj = first_sgf_j(1, jset)
max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
END DO

ALLOCATE (max_contraction_k(nsetk))
max_contraction_k = 0.0_dp
DO kset = 1, nsetk
sgfk = first_sgf_k(1, kset)
max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
Expand All @@ -2791,6 +2782,9 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
block_not_zero = .FALSE.
DO iset = 1, nseti

sgfi = first_sgf_i(1, iset)
max_contraction_i = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))

DO jset = 1, nsetj

IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
Expand All @@ -2804,73 +2798,73 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
ncoj = npgfj(jset)*ncoset(lmax_j(jset))
ncok = npgfk(kset)*ncoset(lmax_k(kset))

sgfi = first_sgf_i(1, iset)
sgfj = first_sgf_j(1, jset)
sgfk = first_sgf_k(1, kset)
!ensure non-zero number of triples below
IF (ncoj*ncok*ncoi == 0) CYCLE

IF (ncoj*ncok*ncoi > 0) THEN
ALLOCATE (sijk(ncoj, ncok, ncoi))
sijk(:, :, :) = 0.0_dp
!need positions for libint. Only relative positions are needed => set ri to 0.0
ri = 0.0_dp
rj = rij ! ri + rij
rk = rik ! ri + rik

!need positions for libint. Only relative positions are needed => set ri to 0.0
ri = 0.0_dp
rj = rij ! ri + rij
rk = rik ! ri + rik
ALLOCATE (sijk(ncoj, ncok, ncoi))
IF (op_pos_prv == 1) THEN
sijk(:, :, :) = 0.0_dp
CALL eri_3center(sijk, &
lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
djk, dij, dik, lib, potential_parameter, int_abc_ext=sijk_ext)
ELSE
ALLOCATE (tmp_ijk(ncoi, ncoj, ncok))
tmp_ijk(:, :, :) = 0.0_dp
CALL eri_3center(tmp_ijk, &
lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
dij, dik, djk, lib, potential_parameter, int_abc_ext=sijk_ext)

!F08: sijk = RESHAPE(tmp_ijk, [ncoj, ncok, ncoi], ORDER=[2, 3, 1]) with sijk not allocated
DO i = 1, ncoi !TODO: revise/check for efficiency
sijk(:, :, i) = tmp_ijk(i, :, :)
END DO
DEALLOCATE (tmp_ijk)
END IF

IF (op_pos_prv == 1) THEN
CALL eri_3center(sijk, &
lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
djk, dij, dik, lib, potential_parameter, int_abc_ext=sijk_ext)
ELSE
ALLOCATE (tmp_ijk(ncoi, ncoj, ncok))
tmp_ijk = 0.0_Dp
CALL eri_3center(tmp_ijk, &
lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
dij, dik, djk, lib, potential_parameter, int_abc_ext=sijk_ext)
DO i = 1, ncoi
!TODO: this cannot stay for efficiency (probably)
sijk(:, :, i) = tmp_ijk(i, :, :)
END DO
DEALLOCATE (tmp_ijk)
IF (PRESENT(int_eps)) THEN
IF (int_eps > sijk_ext*(max_contraction_i* &
max_contraction_j(jset)* &
max_contraction_k(kset))) THEN
DEALLOCATE (sijk)
CYCLE
END IF
END IF

IF (PRESENT(int_eps)) THEN
IF (int_eps > sijk_ext*(max_contraction_i(iset)* &
max_contraction_j(jset)* &
max_contraction_k(kset))) THEN
DEALLOCATE (sijk)
CYCLE
END IF
END IF
block_not_zero = .TRUE.
ALLOCATE (sijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
CALL libxsmm_abc_contract(sijk_contr, sijk, tspj(jset, jkind)%array, &
spk(kset, kkind)%array, spi(iset, ikind)%array, &
ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
nsgfi(iset), cpp_buffer, ccp_buffer)
DEALLOCATE (sijk)

block_not_zero = .TRUE.
ALLOCATE (sijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
CALL libxsmm_abc_contract(sijk_contr, sijk, tspj(jset, jkind)%array, &
spk(kset, kkind)%array, spi(iset, ikind)%array, &
ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
nsgfi(iset), cpp_buffer, ccp_buffer)
DEALLOCATE (sijk)
sgfj = first_sgf_j(1, jset)
sgfk = first_sgf_k(1, kset)

block_start_j = sgfj
block_end_j = sgfj + nsgfj(jset) - 1
block_start_k = sgfk
block_end_k = sgfk + nsgfk(kset) - 1
block_start_i = sgfi
block_end_i = sgfi + nsgfi(iset) - 1
block_start_j = sgfj
block_end_j = sgfj + nsgfj(jset) - 1
block_start_k = sgfk
block_end_k = sgfk + nsgfk(kset) - 1
block_start_i = sgfi
block_end_i = sgfi + nsgfi(iset) - 1

block_t(block_start_j:block_end_j, &
block_start_k:block_end_k, &
block_start_i:block_end_i) = &
block_t(block_start_j:block_end_j, &
block_start_k:block_end_k, &
block_start_i:block_end_i) = &
block_t(block_start_j:block_end_j, &
block_start_k:block_end_k, &
block_start_i:block_end_i) + &
prefac*sijk_contr(:, :, :)
DEALLOCATE (sijk_contr)
END IF ! number of triples > 0
block_start_i:block_end_i) + &
prefac*sijk_contr(:, :, :)
DEALLOCATE (sijk_contr)

END DO

Expand All @@ -2885,9 +2879,9 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
CALL dbt_get_block(t3c(jcell, kcell), blk_idx, dummy_block_t, found=found)
CPASSERT(found)
END IF
sp = SHAPE(block_t)

sp([2, 3, 1]) = sp
sp = SHAPE(block_t)
sp([2, 3, 1]) = sp ! sp = sp([2, 3, 1]) performs worse

CALL dbt_put_block(t3c(jcell, kcell), blk_idx, sp, &
RESHAPE(block_t, SHAPE=sp, ORDER=[2, 3, 1]), summation=.TRUE.)
Expand All @@ -2897,7 +2891,7 @@ SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
END IF

DEALLOCATE (block_t)
DEALLOCATE (max_contraction_i, max_contraction_j, max_contraction_k)
DEALLOCATE (max_contraction_j, max_contraction_k)
END DO

CALL cp_libint_cleanup_3eri(lib)
Expand Down

0 comments on commit adfb00c

Please sign in to comment.