Skip to content

Commit

Permalink
collect code optimizations on top of #864 and #868 (#869)
Browse files Browse the repository at this point in the history
* Attempt to avoid zeroing (intermediate) array when setup as a whole. Revised #864 and #868 at the expense of some (minor) code duplication.

* Multiply with reciprocal value rather than dividing by it (fgamma_0).
  • Loading branch information
hfp committed Apr 17, 2020
1 parent 3046fae commit a91bc39
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 35 deletions.
21 changes: 11 additions & 10 deletions src/common/gamma.F
Original file line number Diff line number Diff line change
Expand Up @@ -209,41 +209,42 @@ SUBROUTINE fgamma_0(nmax, t, f)
ELSE

! *** t > 12 ***
tmp = 1.0_dp/t ! reciprocal value

IF (t <= 15.0_dp) THEN

! *** 12 < t <= 15 -> Four term polynom expansion ***

g = 0.4999489092_dp - 0.2473631686_dp/t + &
0.321180909_dp/t**2 - 0.3811559346_dp/t**3
f(0) = 0.5_dp*SQRT(pi/t) - g*EXP(-t)/t
g = 0.4999489092_dp - 0.2473631686_dp*tmp + &
0.321180909_dp*tmp**2 - 0.3811559346_dp*tmp**3
f(0) = 0.5_dp*SQRT(pi*tmp) - g*EXP(-t)*tmp

ELSE IF (t <= 18.0_dp) THEN

! *** 15 < t <= 18 -> Three term polynom expansion ***

g = 0.4998436875_dp - 0.24249438_dp/t + 0.24642845_dp/t**2
f(0) = 0.5_dp*SQRT(pi/t) - g*EXP(-t)/t
g = 0.4998436875_dp - 0.24249438_dp*tmp + 0.24642845_dp*tmp**2
f(0) = 0.5_dp*SQRT(pi*tmp) - g*EXP(-t)*tmp

ELSE IF (t <= 24.0_dp) THEN

! *** 18 < t <= 24 -> Two term polynom expansion ***

g = 0.499093162_dp - 0.2152832_dp/t
f(0) = 0.5_dp*SQRT(pi/t) - g*EXP(-t)/t
g = 0.499093162_dp - 0.2152832_dp*tmp
f(0) = 0.5_dp*SQRT(pi*tmp) - g*EXP(-t)*tmp

ELSE IF (t <= 30.0_dp) THEN

! *** 24 < t <= 30 -> One term polynom expansion ***

g = 0.49_dp
f(0) = 0.5_dp*SQRT(pi/t) - g*EXP(-t)/t
f(0) = 0.5_dp*SQRT(pi*tmp) - g*EXP(-t)*tmp

ELSE

! *** t > 30 -> Asymptotic formula ***

f(0) = 0.5_dp*SQRT(pi/t)
f(0) = 0.5_dp*SQRT(pi*tmp)

END IF

Expand All @@ -257,7 +258,7 @@ SUBROUTINE fgamma_0(nmax, t, f)
! *** generate the remaining F_n(t) values ***

DO n = 1, nmax
f(n) = 0.5_dp*(REAL(2*n - 1, dp)*f(n - 1) - expt)/t
f(n) = (0.5_dp*tmp)*(REAL(2*n - 1, dp)*f(n - 1) - expt)
END DO

END IF
Expand Down
66 changes: 44 additions & 22 deletions src/libint_2c_3c.F
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,30 @@ SUBROUTINE eri_3center(int_abc, la_min, la_max, npgfa, zeta, rpgfa, ra, &
a_mysize(1) = ncoa*ncob*ncoc
CALL cp_libint_get_3eris(li, lj, lk, lib, p_work, a_mysize)

DO k = 1, ncoc
p1 = (k - 1)*ncob
DO j = 1, ncob
p2 = (p1 + j - 1)*ncoa
DO i = 1, ncoa
p3 = p2 + i
int_abc(a_offset + i, b_offset + j, c_offset + k) = p_work(p3)
IF (PRESENT(int_abc_ext)) THEN
DO k = 1, ncoc
p1 = (k - 1)*ncob
DO j = 1, ncob
p2 = (p1 + j - 1)*ncoa
DO i = 1, ncoa
p3 = p2 + i
int_abc(a_offset + i, b_offset + j, c_offset + k) = p_work(p3)
int_abc_ext = MAX(int_abc_ext, ABS(p_work(p3)))
END DO
END DO
END DO
ELSE
DO k = 1, ncoc
p1 = (k - 1)*ncob
DO j = 1, ncob
p2 = (p1 + j - 1)*ncoa
DO i = 1, ncoa
p3 = p2 + i
int_abc(a_offset + i, b_offset + j, c_offset + k) = p_work(p3)
END DO
END DO
IF (PRESENT(int_abc_ext)) THEN
int_abc_ext = MAX(int_abc_ext, MAXVAL(ABS(p_work(p2 + 1:p2 + ncoa))))
ENDIF
END DO
END DO
ENDIF

END DO !lk
END DO !lj
Expand All @@ -224,19 +235,30 @@ SUBROUTINE eri_3center(int_abc, la_min, la_max, npgfa, zeta, rpgfa, ra, &
a_mysize(1) = ncoa*ncob*ncoc
CALL cp_libint_get_3eris(lj, li, lk, lib, p_work, a_mysize)

DO k = 1, ncoc
p1 = (k - 1)*ncoa
DO i = 1, ncoa
p2 = (p1 + i - 1)*ncob
DO j = 1, ncob
p3 = p2 + j
int_abc(a_offset + i, b_offset + j, c_offset + k) = p_work(p3)
IF (PRESENT(int_abc_ext)) THEN
DO k = 1, ncoc
p1 = (k - 1)*ncoa
DO i = 1, ncoa
p2 = (p1 + i - 1)*ncob
DO j = 1, ncob
p3 = p2 + j
int_abc(a_offset + i, b_offset + j, c_offset + k) = p_work(p3)
int_abc_ext = MAX(int_abc_ext, ABS(p_work(p3)))
END DO
END DO
END DO
ELSE
DO k = 1, ncoc
p1 = (k - 1)*ncoa
DO i = 1, ncoa
p2 = (p1 + i - 1)*ncob
DO j = 1, ncob
p3 = p2 + j
int_abc(a_offset + i, b_offset + j, c_offset + k) = p_work(p3)
END DO
END DO
IF (PRESENT(int_abc_ext)) THEN
int_abc_ext = MAX(int_abc_ext, MAXVAL(ABS(p_work(p2 + 1:p2 + ncob))))
ENDIF
END DO
END DO
ENDIF

END DO !lk
END DO !li
Expand Down
5 changes: 2 additions & 3 deletions src/xas_tdp_integrals.F
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,7 @@ SUBROUTINE fill_pqX_tensor(pq_X, ab_nl, ac_nl, basis_set_list_a, basis_set_list_
!pgf integrals
ALLOCATE (sabc(ncoa, ncob, ncoc))
sabc = 0.0_dp
sabc_ext = 0.0_dp
sabc(:, :, :) = 0.0_dp
IF (do_screen) THEN
CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
Expand All @@ -548,7 +547,7 @@ SUBROUTINE fill_pqX_tensor(pq_X, ab_nl, ac_nl, basis_set_list_a, basis_set_list_
END IF
ALLOCATE (work(nsgfa(iset), nsgfb(jset), nsgfc(kset)))
work = 0.0_dp
!zeroing work is superfluous here (work = 0.0_dp)
CALL abc_contract(work, sabc, &
sphi_a(:, sgfa:), sphi_b(:, sgfb:), sphi_c(:, sgfc:), &
Expand Down

0 comments on commit a91bc39

Please sign in to comment.