Skip to content

Commit

Permalink
XAS_TDP| Full TDDFT refactorization: stop using iterative sqrt and
Browse files Browse the repository at this point in the history
inverse for matrix
  • Loading branch information
abussy authored and pseewald committed Oct 9, 2019
1 parent af9eff1 commit ac40d9b
Show file tree
Hide file tree
Showing 8 changed files with 2,266 additions and 1,671 deletions.
7 changes: 4 additions & 3 deletions src/cp_dbcsr_operations.F
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
!>
!> <b>Modification history:</b>
!> - Created 2009-05-12
!> - Generalized sm_fm_mulitply for matrices w/ different row/block size (A. Bussy, 11.2018)
! **************************************************************************************************
MODULE cp_dbcsr_operations
USE mathlib, ONLY: lcm, gcd
Expand Down Expand Up @@ -567,7 +568,7 @@ SUBROUTINE cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
INTEGER, DIMENSION(:), POINTER :: col_blk_size_right_in, &
col_blk_size_right_out, row_blk_size,&
!row_cluster, col_cluster,&
row_dist, col_dist
row_dist, col_dist, col_blk_size
TYPE(dbcsr_type) :: in, out
REAL(dp) :: my_alpha, my_beta
TYPE(dbcsr_distribution_type) :: dist, dist_right_in, product_dist
Expand Down Expand Up @@ -597,11 +598,11 @@ SUBROUTINE cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
! 'alpha',my_alpha,'beta',my_beta

IF (ncol .GT. 0 .AND. k_out .GT. 0 .AND. k_in .GT. 0) THEN
CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, distribution=dist)
CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, col_blk_size=col_blk_size, distribution=dist)
CALL dbcsr_create_dist_r_unrot(dist_right_in, dist, k_in, col_blk_size_right_in)

CALL dbcsr_create(in, "D", dist_right_in, dbcsr_type_no_symmetry, &
row_blk_size, col_blk_size_right_in, nze=0)
col_blk_size, col_blk_size_right_in, nze=0)

CALL dbcsr_distribution_get(dist, row_dist=row_dist)
CALL dbcsr_distribution_get(dist_right_in, col_dist=col_dist)
Expand Down
5 changes: 2 additions & 3 deletions src/generic_os_integrals.F
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,8 @@ END SUBROUTINE int_operator_ab_os_low
! **************************************************************************************************
SUBROUTINE int_overlap_ab_os(sab, dsab, rab, fba, fbb, calculate_forces, debug, dmax)

REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
INTENT(INOUT) :: sab
REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: sab
REAL(KIND=dp), DIMENSION(:, :, :), &
INTENT(INOUT), OPTIONAL :: dsab
REAL(KIND=dp), DIMENSION(3), INTENT(IN) :: rab
TYPE(gto_basis_set_type), POINTER :: fba, fbb
Expand Down
230 changes: 221 additions & 9 deletions src/qs_o3c_methods.F
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
!> \brief Methods used with 3-center overlap type integrals containers
!> \par History
!> - none
!> - 11.2018 Added calculate_o3c_coulomb_integrals for XAS_TDP [A.Bussy]
!> + fixed OMP race condition in contract3_o3c routine
! **************************************************************************************************
MODULE qs_o3c_methods
USE ai_contraction_sphi, ONLY: abc_contract
USE ai_coulomb, ONLY: coulomb3
USE ai_overlap3, ONLY: overlap3
USE basis_set_types, ONLY: gto_basis_set_p_type,&
gto_basis_set_type
Expand All @@ -31,7 +34,8 @@ MODULE qs_o3c_methods

CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_o3c_methods'

PUBLIC :: calculate_o3c_integrals, contract12_o3c, contract3_o3c
PUBLIC :: calculate_o3c_integrals, contract12_o3c, contract3_o3c, &
calculate_o3c_coulomb_integrals

CONTAINS

Expand Down Expand Up @@ -303,6 +307,200 @@ SUBROUTINE calculate_o3c_integrals(o3c, calculate_forces, matrix_p)

END SUBROUTINE calculate_o3c_integrals

! **************************************************************************************************
!> \brief Routine to compute the 3-center Coulomb integrals
!> \param o3c ...
!> \notes This is a temporary solution. In the future, as LIBINT2 will be used and different
!> operators (also with forces) will be available, the main routine calculate_o3c_integrals
!> will be modified with additional arguments
!> It's the user responsibility to provide smart neighbor lists in the first place to avoid
!> over calculations
! **************************************************************************************************
SUBROUTINE calculate_o3c_coulomb_integrals(o3c)
TYPE(o3c_container_type), POINTER :: o3c
CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_o3c_coulomb_integrals', &
routineP = moduleN//':'//routineN
INTEGER :: egfa, egfb, egfc, endc, handle, ikind, ipgfc, iset, jkind, jset, kkind, kset, &
mepos, ncoa, ncob, ncoc, ni, nj, nk, nseta, nsetb, nsetc, nspin, nthread, sgfa, sgfb, &
sgfc, startc
INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, &
lc_min, npgfa, npgfb, npgfc, nsgfa, &
nsgfb, nsgfc
INTEGER, DIMENSION(:, :), POINTER :: first_sgfa, first_sgfb, first_sgfc
REAL(dp), ALLOCATABLE, DIMENSION(:) :: f, gccc, rpgfa, rpgfb
REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: vabc
REAL(dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: v
REAL(KIND=dp) :: dij2, dik2, djk2, rpgfc
REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: sabc, sabc_contr
REAL(KIND=dp), DIMENSION(3) :: rij, rik, rjk
REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_a, set_radius_b, set_radius_c
REAL(KIND=dp), DIMENSION(:, :), POINTER :: sphi_a, sphi_b, sphi_c, tvec, zeta, &
zetb, zetc
REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: iabc
TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER :: basis_set_list_a, basis_set_list_b, &
basis_set_list_c
TYPE(gto_basis_set_type), POINTER :: basis_set_a, basis_set_b, basis_set_c
TYPE(o3c_iterator_type) :: o3c_iterator
CALL timeset(routineN, handle)
CALL get_o3c_container(o3c, nspin=nspin)
! basis sets
CALL get_o3c_container(o3c, basis_set_list_a=basis_set_list_a, &
basis_set_list_b=basis_set_list_b, basis_set_list_c=basis_set_list_c)
nthread = 1
!$ nthread = omp_get_max_threads()
CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED (nthread,o3c_iterator,ncoset,nspin,basis_set_list_a,basis_set_list_b,&
!$OMP basis_set_list_c)&
!$OMP PRIVATE (mepos,ikind,jkind,kkind,basis_set_a,basis_set_b,basis_set_c,rij,rik,rjk,&
!$OMP first_sgfa,la_max,la_min,npgfa,nseta,nsgfa,rpgfa,set_radius_a,sphi_a,zeta,&
!$OMP first_sgfb,lb_max,lb_min,npgfb,nsetb,nsgfb,rpgfb,set_radius_b,sphi_b,zetb,&
!$OMP first_sgfc,lc_max,lc_min,npgfc,nsetc,nsgfc,rpgfc,set_radius_c,sphi_c,zetc,&
!$OMP iset,jset,kset,dij2,dik2,djk2,ni,nj,nk,iabc,tvec,ncoa,ipgfc,startc,endc,&
!$OMP ncob,ncoc,sabc,sabc_contr,sgfa,sgfb,sgfc,egfa,egfb,egfc,f,gccc,vabc,v)
mepos = 0
!$ mepos = omp_get_thread_num()
DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, &
ikind=ikind, jkind=jkind, kkind=kkind, rij=rij, rik=rik, &
integral=iabc, tvec=tvec)
CPASSERT(.NOT. ASSOCIATED(iabc))
CPASSERT(.NOT. ASSOCIATED(tvec))
! basis
basis_set_a => basis_set_list_a(ikind)%gto_basis_set
basis_set_b => basis_set_list_b(jkind)%gto_basis_set
basis_set_c => basis_set_list_c(kkind)%gto_basis_set
! center A
first_sgfa => basis_set_a%first_sgf
la_max => basis_set_a%lmax
la_min => basis_set_a%lmin
npgfa => basis_set_a%npgf
nseta = basis_set_a%nset
nsgfa => basis_set_a%nsgf_set
set_radius_a => basis_set_a%set_radius
sphi_a => basis_set_a%sphi
zeta => basis_set_a%zet
! center B
first_sgfb => basis_set_b%first_sgf
lb_max => basis_set_b%lmax
lb_min => basis_set_b%lmin
npgfb => basis_set_b%npgf
nsetb = basis_set_b%nset
nsgfb => basis_set_b%nsgf_set
set_radius_b => basis_set_b%set_radius
sphi_b => basis_set_b%sphi
zetb => basis_set_b%zet
! center C (RI)
first_sgfc => basis_set_c%first_sgf
lc_max => basis_set_c%lmax
lc_min => basis_set_c%lmin
npgfc => basis_set_c%npgf
nsetc = basis_set_c%nset
nsgfc => basis_set_c%nsgf_set
set_radius_c => basis_set_c%set_radius
sphi_c => basis_set_c%sphi
zetc => basis_set_c%zet
ni = SUM(nsgfa)
nj = SUM(nsgfb)
nk = SUM(nsgfc)
ALLOCATE (iabc(ni, nj, nk))
iabc(:, :, :) = 0.0_dp
ALLOCATE (tvec(nk, nspin))
tvec(:, :) = 0.0_dp
rjk(1:3) = rik(1:3)-rij(1:3)
dij2 = SUM(rij**2)
dik2 = SUM(rik**2)
djk2 = SUM(rjk**2)
DO iset = 1, nseta
DO jset = 1, nsetb
DO kset = 1, nsetc
ncoa = npgfa(iset)*ncoset(la_max(iset))
ncob = npgfb(jset)*ncoset(lb_max(jset))
ncoc = npgfc(kset)*ncoset(lc_max(kset))
sgfa = first_sgfa(1, iset)
sgfb = first_sgfb(1, jset)
sgfc = first_sgfc(1, kset)
egfa = sgfa+nsgfa(iset)-1
egfb = sgfb+nsgfb(jset)-1
egfc = sgfc+nsgfc(kset)-1
IF (ncoa*ncob*ncoc > 0) THEN
! Allocate all the weird work arrays required for coulomb3
ALLOCATE (f(0:la_max(iset)+lb_max(jset)+lc_max(kset)+2))
ALLOCATE (v(ncoa, ncob, ncoc, la_max(iset)+lb_max(jset)+lc_max(kset)+1))
ALLOCATE (gccc(ncoc))
ALLOCATE (vabc(ncoa, ncob))
f = 0.0_dp
v = 0.0_dp
gccc = 0.0_dp
vabc = 0.0_dp
! Hack the radii to make sure coulomb3 does not screen anything out (it's the nl task)
ALLOCATE (rpgfa(npgfa(iset)))
ALLOCATE (rpgfb(npgfb(jset)))
rpgfa = 1.0E10_dp
rpgfb = 1.0E10_dp
rpgfc = 1.0E10_dp

ALLOCATE (sabc(ncoa, ncob, ncoc))
sabc = 0.0_dp

! Because coulomb3 only takes one pgf at a time, need to loop over pgf of c
endc = 0
DO ipgfc = 1, npgfc(kset)
startc = endc+ncoset(lc_min(kset)-1)+1
endc = endc+ncoset(lc_max(kset))
CALL coulomb3(la_max(iset), npgfa(iset), zeta(:, iset), rpgfa, la_min(iset), &
lb_max(jset), npgfb(jset), zetb(:, jset), rpgfb, lb_min(jset), &
lc_max(kset), zetc(ipgfc, kset), rpgfc, lc_min(kset), gccc, &
rij, dij2, rik, dik2, djk2, vabc, &
sabc(:, :, startc:endc), v, f)

END DO

ALLOCATE (sabc_contr(nsgfa(iset), nsgfb(jset), nsgfc(kset)))

CALL abc_contract(sabc_contr, sabc, &
sphi_a(:, sgfa:), sphi_b(:, sgfb:), sphi_c(:, sgfc:), &
ncoa, ncob, ncoc, nsgfa(iset), nsgfb(jset), nsgfc(kset))
iabc(sgfa:egfa, sgfb:egfb, sgfc:egfc) = &
sabc_contr(1:nsgfa(iset), 1:nsgfb(jset), 1:nsgfc(kset))

DEALLOCATE (sabc, sabc_contr, f, v, gccc, vabc, rpgfa, rpgfb)
END IF
END DO !kset
END DO !jset
END DO !iset

CALL set_o3c_container(o3c_iterator, mepos=mepos, integral=iabc, tvec=tvec)

END DO !o3c_iterator
!$OMP END PARALLEL
CALL o3c_iterator_release(o3c_iterator)

CALL timestop(handle)

END SUBROUTINE calculate_o3c_coulomb_integrals

! **************************************************************************************************
!> \brief Contraction of 3-tensor over indices 1 and 2 (assuming symmetry)
!> t(k) = sum_ij (ijk)*p(ij)
Expand Down Expand Up @@ -399,8 +597,9 @@ SUBROUTINE contract3_o3c(o3c, vec, matrix)
CHARACTER(LEN=*), PARAMETER :: routineN = 'contract3_o3c', routineP = moduleN//':'//routineN

INTEGER :: handle, iatom, icol, ik, irow, jatom, &
katom, mepos, nk, nthread
katom, mepos, nk, nthread, s1, s2
LOGICAL :: found, ijsymmetric, trans
REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: work
REAL(KIND=dp), DIMENSION(:), POINTER :: v
REAL(KIND=dp), DIMENSION(:, :), POINTER :: pblock
REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: iabc
Expand All @@ -417,7 +616,7 @@ SUBROUTINE contract3_o3c(o3c, vec, matrix)

!$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED (nthread,o3c_iterator,vec,matrix)&
!$OMP PRIVATE (mepos,iabc,iatom,jatom,katom,irow,icol,trans,pblock,v,found,ik,nk)
!$OMP PRIVATE (mepos,iabc,iatom,jatom,katom,irow,icol,trans,pblock,v,found,ik,nk,work,s1,s2)

mepos = 0
!$ mepos = omp_get_thread_num()
Expand All @@ -439,18 +638,33 @@ SUBROUTINE contract3_o3c(o3c, vec, matrix)
trans = .TRUE.
END IF

CALL dbcsr_get_block_p(matrix=matrix, &
row=irow, col=icol, BLOCK=pblock, found=found)
CALL dbcsr_get_block_p(matrix=matrix, row=irow, col=icol, BLOCK=pblock, found=found)

IF (found) THEN
s1 = SIZE(pblock, 1); s2 = SIZE(pblock, 2)
ALLOCATE (work(s1, s2))
work(:, :) = 0.0_dp

IF (trans) THEN
DO ik = 1, nk
pblock(:, :) = pblock(:, :)+v(ik)*TRANSPOSE(iabc(:, :, ik))
CALL daxpy(s1*s2, v(ik), TRANSPOSE(iabc(:, :, ik)), 1, work(:, :), 1)
END DO
ELSE
DO ik = 1, nk
pblock(:, :) = pblock(:, :)+v(ik)*iabc(:, :, ik)
CALL daxpy(s1*s2, v(ik), iabc(:, :, ik), 1, work(:, :), 1)
END DO
END IF

! Mulitple threads with same irow, icol but different katom (same even in PBCs) can try
! to access the dbcsr block at the same time. Prevent that by CRITICAL section but keep
! computations before hand in order to retain speed

!$OMP CRITICAL
CALL dbcsr_get_block_p(matrix=matrix, row=irow, col=icol, BLOCK=pblock, found=found)
CALL daxpy(s1*s2, 1.0_dp, work(:, :), 1, pblock(:, :), 1)
!$OMP END CRITICAL

DEALLOCATE (work)
END IF

END DO
Expand All @@ -461,6 +675,4 @@ SUBROUTINE contract3_o3c(o3c, vec, matrix)

END SUBROUTINE contract3_o3c

! **************************************************************************************************

END MODULE qs_o3c_methods

0 comments on commit ac40d9b

Please sign in to comment.