Skip to content

Commit

Permalink
Exclude transpose for B matrix for the opt rect algo
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Oct 5, 2018
1 parent 334ef70 commit 1d923e0
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 27 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,16 @@ OTHER_HELP += "install : Install the library and modules under PREFIX=<directory
test:
@export OMP_NUM_THREADS=2 ; \
for test in $(UNITTESTS); do \
echo "****" ; \
echo "**** Unit test: $$test" ; \
echo "****" ; \
mpirun -np $(NPROCS) $(BINDIR)/$$test.x || exit 1; \
done
@export OMP_NUM_THREADS=2 ; \
for input in $(PERFTESTS); do \
echo "****" ; \
echo "Test: $$input" ; \
echo "****" ; \
mpirun -np $(NPROCS) $(BINDIR)/dbcsr_performance_driver.x $$input || exit 1; \
done

Expand Down
55 changes: 28 additions & 27 deletions src/mm/dbcsr_mm.F
Original file line number Diff line number Diff line change
Expand Up @@ -498,31 +498,6 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
!
keep_sparsity = .FALSE.
IF (PRESENT(retain_sparsity)) keep_sparsity = retain_sparsity
!
! Introducing an optimized algoritm for a 1D grid,
! where all ranks are organized over row processors.
! The left matrix must be distributed over row-processors,
! i.e. it is transposed.
! This algorithm is particular beneficial for rectangular
! matrix multiplications, where K>>{M, N}, and it is a variant
! of the CARMA algorithm.
!
! -------- -----
! | | x | |
! -------- | |
! | |
! -----
!
use_rect_algo = .FALSE.
IF (nprows .EQ. numnodes .AND. npcols .EQ. 1 .AND. &
transa .EQ. dbcsr_transpose .AND. &
(.NOT. product_reindex) .AND. &
(.NOT. keep_sparsity) .AND. &
(.NOT. dbcsr_cfg%use_mpi_rma) .AND. &
dbcsr_nfullrows_total(matrix_a) .GT. dbcsr_nfullrows_total(matrix_c) .AND. &
dbcsr_nfullrows_total(matrix_a) .GT. dbcsr_nfullcols_total(matrix_c)) THEN
use_rect_algo = .TRUE.
ENDIF

! check parameters ---------------------------------------------------------
transa_l = transa
Expand Down Expand Up @@ -554,6 +529,32 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
CALL dbcsr_print(matrix_c, nodata=.TRUE.)
ENDIF
ENDIF
!
! Introducing an optimized algoritm for a 1D grid,
! where all ranks are organized over row processors.
! The left matrix must be distributed over row-processors,
! i.e. it is transposed.
! This algorithm is particular beneficial for rectangular
! matrix multiplications, where K>>{M, N}, and it is a variant
! of the CARMA algorithm.
!
! -------- -----
! | | x | |
! -------- | |
! | |
! -----
!
use_rect_algo = .FALSE.
IF (nprows .EQ. numnodes .AND. npcols .EQ. 1 .AND. &
transa_l .EQ. dbcsr_transpose .AND. &
transb_l .EQ. dbcsr_no_transpose .AND. &
(.NOT. product_reindex) .AND. &
(.NOT. keep_sparsity) .AND. &
(.NOT. dbcsr_cfg%use_mpi_rma) .AND. &
dbcsr_nfullrows_total(matrix_a) .GT. dbcsr_nfullrows_total(matrix_c) .AND. &
dbcsr_nfullrows_total(matrix_a) .GT. dbcsr_nfullcols_total(matrix_c)) THEN
use_rect_algo = .TRUE.
ENDIF

! transpose/conjg left and/or right matrices if needed
SELECT CASE (transa_l)
Expand Down Expand Up @@ -1009,8 +1010,8 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
IF (use_rect_algo) THEN
matrix_c_local = dbcsr_type()
IF (use_dense_mult) THEN
ALLOCATE (dist_rows(matrix_a%nblkcols_total), &
dist_cols(matrix_b%nblkcols_total))
ALLOCATE (dist_rows(matrix_left%nblkrows_total), &
dist_cols(matrix_right%nblkcols_total))
dist_rows(:) = mynode
dist_cols(:) = 0
CALL dbcsr_distribution_new(local_distribution, mp_obj, &
Expand Down

0 comments on commit 1d923e0

Please sign in to comment.