From 424586f7d9e6f0922d265889e20fc611edb2d563 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Fri, 22 May 2020 11:40:04 +0200 Subject: [PATCH] RI-HFX: compressed data needs to be updated if tensor is redistributed --- src/hfx_ri.F | 66 ++++++++++++++++++++---------------------------- src/hfx_types.F | 19 +++++++++----- src/qs_tensors.F | 60 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 90 insertions(+), 55 deletions(-) diff --git a/src/hfx_ri.F b/src/hfx_ri.F index 823f3131cf..65c692cc27 100644 --- a/src/hfx_ri.F +++ b/src/hfx_ri.F @@ -43,11 +43,7 @@ MODULE hfx_ri dbcsr_t_pgrid_create, dbcsr_t_pgrid_destroy, dbcsr_t_pgrid_type, & dbcsr_t_reserved_block_indices, dbcsr_t_type USE distribution_2d_types, ONLY: distribution_2d_type - USE hfx_types, ONLY: alloc_containers,& - dealloc_containers,& - hfx_container_type,& - hfx_init_container,& - hfx_ri_type + USE hfx_types, ONLY: hfx_ri_type USE input_constants, ONLY: hfx_ri_do_2c_cholesky,& hfx_ri_do_2c_diag,& hfx_ri_do_2c_iter @@ -548,9 +544,8 @@ SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data) CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_Pmat', & routineP = moduleN//':'//routineN - INTEGER :: handle, handle2, i, i_mem, j_mem, & - n_dependent, unit_nr, unit_nr_dbcsr, & - unused + INTEGER :: handle, handle2, i_mem, j_mem, & + n_dependent, unit_nr, unit_nr_dbcsr INTEGER(int_8) :: nflop INTEGER, DIMENSION(2, 1) :: bounds_i INTEGER, DIMENSION(2, 2) :: bounds_j @@ -563,8 +558,6 @@ SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data) TYPE(dbcsr_t_type), DIMENSION(1, 1) :: t_3c_int_1 TYPE(dbcsr_type), DIMENSION(1) :: t_2c_int_mat, t_2c_op_pot, t_2c_op_RI, & t_2c_tmp, t_2c_tmp_2 - TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers - TYPE(hfx_container_type), POINTER :: maxval_container CALL timeset(routineN, handle) @@ -657,22 +650,12 @@ SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data) CALL timestop(handle2) CALL timeset(routineN//"_copy_2", handle2) - CALL dbcsr_t_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.) - - CALL dealloc_containers(ri_data%store_3c(j_mem, i_mem), unused) - CALL alloc_containers(ri_data%store_3c(j_mem, i_mem), 1) - - maxval_container => ri_data%store_3c(j_mem, i_mem)%maxval_container(1) - integral_containers => ri_data%store_3c(j_mem, i_mem)%integral_containers(:, 1) - CALL hfx_init_container(maxval_container, unused, .FALSE.) - DO i = 1, 64 - CALL hfx_init_container(integral_containers(i), unused, .FALSE.) - END DO + CALL dbcsr_t_copy(t_3c_2, ri_data%t_3c_int_ctr_1(j_mem, i_mem), order=[2, 1, 3], move_data=.TRUE.) IF (ALLOCATED(ri_data%blk_indices(j_mem, i_mem)%ind)) DEALLOCATE (ri_data%blk_indices(j_mem, i_mem)%ind) - ALLOCATE (ri_data%blk_indices(j_mem, i_mem)%ind(dbcsr_t_get_num_blocks(ri_data%t_3c_int_ctr_1(1, 1)), 3)) - CALL dbcsr_t_reserved_block_indices(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind) - CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage) + ALLOCATE (ri_data%blk_indices(j_mem, i_mem)%ind(dbcsr_t_get_num_blocks(ri_data%t_3c_int_ctr_1(j_mem, i_mem)), 3)) + CALL dbcsr_t_reserved_block_indices(ri_data%t_3c_int_ctr_1(j_mem, i_mem), ri_data%blk_indices(j_mem, i_mem)%ind) + CALL compress_tensor(ri_data%t_3c_int_ctr_1(j_mem, i_mem), ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage) CALL timestop(handle2) ENDDO @@ -1392,16 +1375,25 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, & CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)") ENDIF - IF (ASSOCIATED(ri_data%pgrid_1)) CALL tensor_change_pgrid(ri_data%t_3c_int_ctr_1(1, 1), & - ri_data%pgrid_1, & - starts_array_mc_block_1=ri_data%starts_array_mem_block, & - ends_array_mc_block_1=ri_data%ends_array_mem_block, & - starts_array_mc_block_2=ri_data%starts_array_RI_mem_block, & - ends_array_mc_block_2=ri_data%ends_array_RI_mem_block, & - nodata=.TRUE., & - unit_nr=unit_nr_dbcsr) + n_mem = ri_data%n_mem - IF (ASSOCIATED(ri_data%pgrid_1)) CALL tensor_change_pgrid(ri_data%t_3c_int_ctr_2(1, 1), & + DO i_mem = 1, n_mem + DO j_mem = 1, n_mem + IF (ASSOCIATED(ri_data%pgrid_1)) CALL tensor_change_pgrid(ri_data%t_3c_int_ctr_1(i_mem, j_mem), & + ri_data%pgrid_1, & + starts_array_mc_block_1=ri_data%starts_array_mem_block, & + ends_array_mc_block_1=ri_data%ends_array_mem_block, & + starts_array_mc_block_2=ri_data%starts_array_RI_mem_block, & + ends_array_mc_block_2=ri_data%ends_array_RI_mem_block, & + nodata=.FALSE., & + blk_indices=ri_data%blk_indices(i_mem, j_mem)%ind, & + compressed=ri_data%store_3c(i_mem, j_mem), & + eps=ri_data%filter_eps_storage, & + unit_nr=unit_nr_dbcsr) + ENDDO + ENDDO + + IF (ASSOCIATED(ri_data%pgrid_2)) CALL tensor_change_pgrid(ri_data%t_3c_int_ctr_2(1, 1), & ri_data%pgrid_2, & starts_array_mc_block_2=ri_data%starts_array_RI_mem_block, & ends_array_mc_block_2=ri_data%ends_array_RI_mem_block, & @@ -1429,8 +1421,6 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, & CALL mp_sync(para_env%group) t1 = m_walltime() - n_mem = ri_data%n_mem - flops_ks_max = 0; flops_p_max = 0 DO ispin = 1, nspins @@ -1507,14 +1497,14 @@ SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, & bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)] bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)] - CALL decompress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(i_mem, j_mem)%ind, & + CALL decompress_tensor(ri_data%t_3c_int_ctr_1(i_mem, j_mem), ri_data%blk_indices(i_mem, j_mem)%ind, & ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage) - CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ) + CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(i_mem, j_mem), nze, occ) nze_3c_2 = nze_3c_2 + nze occ_3c_2 = occ_3c_2 + occ CALL timeset(routineN//"_KS", handle2) - CALL dbcsr_t_contract(dbcsr_scalar(1.0_dp), ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, & + CALL dbcsr_t_contract(dbcsr_scalar(1.0_dp), ri_data%t_3c_int_ctr_1(i_mem, j_mem), t_3c_3, & dbcsr_scalar(1.0_dp), ks_t, & contract_1=[1, 2], notcontract_1=[3], & contract_2=[1, 2], notcontract_2=[3], & diff --git a/src/hfx_types.F b/src/hfx_types.F index e41398c2b3..937a6f520c 100644 --- a/src/hfx_types.F +++ b/src/hfx_types.F @@ -38,10 +38,10 @@ MODULE hfx_types USE cp_para_types, ONLY: cp_para_env_type USE cp_units, ONLY: cp_unit_from_cp2k USE dbcsr_tensor_api, ONLY: & - dbcsr_t_batched_contract_finalize, dbcsr_t_default_distvec, dbcsr_t_destroy, & - dbcsr_t_distribution_destroy, dbcsr_t_distribution_new, dbcsr_t_distribution_type, & - dbcsr_t_mp_dims_create, dbcsr_t_pgrid_create, dbcsr_t_pgrid_destroy, dbcsr_t_pgrid_type, & - dbcsr_t_type + dbcsr_t_batched_contract_finalize, dbcsr_t_create, dbcsr_t_default_distvec, & + dbcsr_t_destroy, dbcsr_t_distribution_destroy, dbcsr_t_distribution_new, & + dbcsr_t_distribution_type, dbcsr_t_mp_dims_create, dbcsr_t_pgrid_create, & + dbcsr_t_pgrid_destroy, dbcsr_t_pgrid_type, dbcsr_t_type USE hfx_helpers, ONLY: count_cells_perd,& next_image_cell_perd USE input_constants, ONLY: & @@ -1311,7 +1311,7 @@ SUBROUTINE hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para ri_data%starts_array_RI_mem, ri_data%ends_array_RI_mem, & ri_data%starts_array_RI_mem_block, ri_data%ends_array_RI_mem_block) - ALLOCATE (ri_data%t_3c_int_ctr_1(1, 1)) + ALLOCATE (ri_data%t_3c_int_ctr_1(ri_data%n_mem, ri_data%n_mem)) CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%dist1_ao_1, ri_data%dist1_ri, ri_data%dist1_ao_2, & ri_data%pgrid, ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, & [1, 2], [3], & @@ -1321,6 +1321,13 @@ SUBROUTINE hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para ends_array_block_2=ri_data%ends_array_RI_mem_block, & name="(AO RI | AO)") + DO i_mem = 1, ri_data%n_mem + DO j_mem = 1, ri_data%n_mem + IF (.NOT. (i_mem == 1 .AND. j_mem == 1)) & + CALL dbcsr_t_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(i_mem, j_mem)) + ENDDO + ENDDO + ALLOCATE (ri_data%blk_indices(ri_data%n_mem, ri_data%n_mem)) ALLOCATE (ri_data%store_3c(ri_data%n_mem, ri_data%n_mem)) DO i_mem = 1, ri_data%n_mem @@ -1479,9 +1486,9 @@ SUBROUTINE hfx_ri_release(ri_data) DO i_mem = 1, ri_data%n_mem DO j_mem = 1, ri_data%n_mem CALL dealloc_containers(ri_data%store_3c(i_mem, j_mem), unused) + CALL dbcsr_t_destroy(ri_data%t_3c_int_ctr_1(i_mem, j_mem)) ENDDO ENDDO - CALL dbcsr_t_destroy(ri_data%t_3c_int_ctr_1(1, 1)) DEALLOCATE (ri_data%t_3c_int_ctr_1) CALL dbcsr_t_destroy(ri_data%t_3c_int_ctr_2(1, 1)) DEALLOCATE (ri_data%t_3c_int_ctr_2) diff --git a/src/qs_tensors.F b/src/qs_tensors.F index ff305bc761..cb4ef1cf7d 100644 --- a/src/qs_tensors.F +++ b/src/qs_tensors.F @@ -33,12 +33,12 @@ MODULE qs_tensors dbcsr_t_blk_sizes, dbcsr_t_clear, dbcsr_t_copy, dbcsr_t_create, dbcsr_t_default_distvec, & dbcsr_t_destroy, dbcsr_t_distribution_destroy, dbcsr_t_distribution_new, & dbcsr_t_distribution_type, dbcsr_t_filter, dbcsr_t_get_block, dbcsr_t_get_info, & - dbcsr_t_get_mapping_info, dbcsr_t_get_nze_total, dbcsr_t_get_stored_coordinates, & - dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_next_block, dbcsr_t_iterator_start, & - dbcsr_t_iterator_stop, dbcsr_t_iterator_type, dbcsr_t_mp_environ_pgrid, & - dbcsr_t_nblks_total, dbcsr_t_ndims, dbcsr_t_ndims_matrix_column, dbcsr_t_ndims_matrix_row, & - dbcsr_t_pgrid_type, dbcsr_t_put_block, dbcsr_t_reserve_blocks, dbcsr_t_type, & - dbcsr_t_write_split_info + dbcsr_t_get_mapping_info, dbcsr_t_get_num_blocks, dbcsr_t_get_nze_total, & + dbcsr_t_get_stored_coordinates, dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_next_block, & + dbcsr_t_iterator_start, dbcsr_t_iterator_stop, dbcsr_t_iterator_type, & + dbcsr_t_mp_environ_pgrid, dbcsr_t_nblks_total, dbcsr_t_ndims, dbcsr_t_ndims_matrix_column, & + dbcsr_t_ndims_matrix_row, dbcsr_t_pgrid_type, dbcsr_t_put_block, dbcsr_t_reserve_blocks, & + dbcsr_t_reserved_block_indices, dbcsr_t_type, dbcsr_t_write_split_info USE distribution_1d_types, ONLY: distribution_1d_type USE distribution_2d_types, ONLY: distribution_2d_type USE gamma, ONLY: init_md_ftable @@ -49,9 +49,12 @@ MODULE qs_tensors hfx_get_mult_cache_elements,& hfx_get_single_cache_element,& hfx_reset_cache_and_container - USE hfx_types, ONLY: hfx_cache_type,& + USE hfx_types, ONLY: alloc_containers,& + dealloc_containers,& + hfx_cache_type,& hfx_compression_type,& - hfx_container_type + hfx_container_type,& + hfx_init_container USE input_constants, ONLY: do_potential_coulomb,& do_potential_id,& do_potential_short,& @@ -1585,12 +1588,16 @@ SUBROUTINE build_2c_integrals(t2c, filter_eps, qs_env, & !> \param ends_array_mc_block_2 ... !> \param starts_array_mc_block_3 ... !> \param ends_array_mc_block_3 ... +!> \param blk_indices ... +!> \param compressed ... +!> \param eps ... !> \param unit_nr ... ! ************************************************************************************************** SUBROUTINE tensor_change_pgrid(t_3c, pgrid, nodata, & starts_array_mc_block_1, ends_array_mc_block_1, & starts_array_mc_block_2, ends_array_mc_block_2, & starts_array_mc_block_3, ends_array_mc_block_3, & + blk_indices, compressed, eps, & unit_nr) TYPE(dbcsr_t_type), INTENT(INOUT) :: t_3c TYPE(dbcsr_t_pgrid_type), INTENT(IN) :: pgrid @@ -1598,6 +1605,11 @@ SUBROUTINE tensor_change_pgrid(t_3c, pgrid, nodata, & INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: starts_array_mc_block_1, & ends_array_mc_block_1, starts_array_mc_block_2, ends_array_mc_block_2, & starts_array_mc_block_3, ends_array_mc_block_3 + INTEGER, ALLOCATABLE, DIMENSION(:, :), & + INTENT(INOUT), OPTIONAL :: blk_indices + TYPE(hfx_compression_type), INTENT(INOUT), & + OPTIONAL :: compressed + REAL(dp), INTENT(IN), OPTIONAL :: eps INTEGER, INTENT(IN), OPTIONAL :: unit_nr CHARACTER(LEN=*), PARAMETER :: routineN = 'tensor_change_pgrid', & @@ -1627,6 +1639,14 @@ SUBROUTINE tensor_change_pgrid(t_3c, pgrid, nodata, & CALL timeset(routineN, handle) + IF (PRESENT(compressed)) THEN + CPASSERT(PRESENT(blk_indices)) + CPASSERT(PRESENT(eps)) + IF (PRESENT(nodata)) THEN + CPASSERT(.NOT. nodata) + ENDIF + ENDIF + mem_aware_1 = PRESENT(starts_array_mc_block_1) .AND. PRESENT(ends_array_mc_block_1) mem_aware_2 = PRESENT(starts_array_mc_block_2) .AND. PRESENT(ends_array_mc_block_2) mem_aware_3 = PRESENT(starts_array_mc_block_3) .AND. PRESENT(ends_array_mc_block_3) @@ -1686,6 +1706,10 @@ SUBROUTINE tensor_change_pgrid(t_3c, pgrid, nodata, & CALL dbcsr_t_create(t_tmp, name, dist, map1, map2, dbcsr_type_real_8, bs1, bs2, bs3) CALL dbcsr_t_distribution_destroy(dist) + IF (PRESENT(compressed)) THEN + CALL decompress_tensor(t_3c, blk_indices, compressed, eps) + ENDIF + IF (PRESENT(nodata)) THEN IF (.NOT. nodata) CALL dbcsr_t_copy(t_3c, t_tmp, move_data=.TRUE.) ELSE @@ -1695,6 +1719,13 @@ SUBROUTINE tensor_change_pgrid(t_3c, pgrid, nodata, & CALL dbcsr_t_destroy(t_3c) t_3c = t_tmp + IF (PRESENT(compressed)) THEN + IF (ALLOCATED(blk_indices)) DEALLOCATE (blk_indices) + ALLOCATE (blk_indices(dbcsr_t_get_num_blocks(t_3c), 3)) + CALL dbcsr_t_reserved_block_indices(t_3c, blk_indices) + CALL compress_tensor(t_3c, compressed, eps) + ENDIF + IF (PRESENT(unit_nr)) THEN IF (unit_nr > 0) THEN WRITE (unit_nr, "(T2,A,1X,A)") "OPTIMIZED PGRID INFO FOR", TRIM(t_3c%name) @@ -1731,12 +1762,19 @@ SUBROUTINE compress_tensor(tensor, compressed, eps) TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers TYPE(hfx_container_type), POINTER :: maxval_container - maxval_cache => compressed%maxval_cache(1) + CALL dealloc_containers(compressed, unused) + CALL alloc_containers(compressed, 1) + maxval_container => compressed%maxval_container(1) - integral_caches => compressed%integral_caches(:, 1) integral_containers => compressed%integral_containers(:, 1) - unused = 0 + CALL hfx_init_container(maxval_container, unused, .FALSE.) + DO i = 1, 64 + CALL hfx_init_container(integral_containers(i), unused, .FALSE.) + END DO + + maxval_cache => compressed%maxval_cache(1) + integral_caches => compressed%integral_caches(:, 1) CALL dbcsr_t_iterator_start(iter, tensor) DO WHILE (dbcsr_t_iterator_blocks_left(iter))