Skip to content

Commit

Permalink
DBM: Add statistics about block multiplications
Browse files Browse the repository at this point in the history
  • Loading branch information
oschuett committed Jun 19, 2022
1 parent 0dcb854 commit 982fd8e
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/dbm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ clean:
realclean: clean
rm -fv *.x

CFLAGS := -fopenmp -g -O3 -march=native -Wall -Wextra
CFLAGS := -fopenmp -g -O3 -march=native -Wall -Wextra -Wno-vla-parameter
NVARCH := sm_70
NVFLAGS := -g -O3 -lineinfo -arch $(NVARCH) -Wno-deprecated-gpu-targets -Xcompiler "$(CFLAGS)" -D__OFFLOAD_CUDA

Expand Down
2 changes: 1 addition & 1 deletion src/dbm/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "Distributed Block-sparse Matrix",
"requires": ["../base", "../mpiwrap", "../offload"],
"requires": ["../base", "../mpiwrap", "../offload", "../common"],
"public": ["dbm_api.F"],
}
55 changes: 53 additions & 2 deletions src/dbm/dbm_api.F
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
!--------------------------------------------------------------------------------------------------!

MODULE dbm_api
USE ISO_C_BINDING, ONLY: &
C_ASSOCIATED, C_BOOL, C_CHAR, C_DOUBLE, C_F_POINTER, C_INT, C_INT64_T, C_NULL_CHAR, C_NULL_PTR, C_PTR
USE ISO_C_BINDING, ONLY: C_ASSOCIATED, C_BOOL, C_CHAR, C_DOUBLE, C_F_POINTER, C_FUNLOC, C_FUNPTR, &
C_INT, C_INT64_T, C_NULL_CHAR, C_NULL_PTR, C_PTR
USE kinds, ONLY: default_string_length, &
dp, &
int_8
USE message_passing, ONLY: mp_cart_rank, &
mp_environ
USE string_utilities, ONLY: strlcpy_c2f

! Uncomment the following line to enable validation.
!#define DBM_VALIDATE_AGAINST_DBCSR
Expand Down Expand Up @@ -117,6 +118,7 @@ MODULE dbm_api

PUBLIC :: dbm_library_init
PUBLIC :: dbm_library_finalize
PUBLIC :: dbm_library_print_stats

TYPE dbm_distribution_obj
PRIVATE
Expand Down Expand Up @@ -1500,4 +1502,53 @@ END SUBROUTINE dbm_library_finalize_c
END SUBROUTINE dbm_library_finalize
! **************************************************************************************************
!> \brief Print DBM library statistics
!> \param mpi_comm ...
!> \param output_unit ...
!> \author Ole Schuett
! **************************************************************************************************
SUBROUTINE dbm_library_print_stats(mpi_comm, output_unit)
INTEGER, INTENT(IN) :: mpi_comm, output_unit
INTERFACE
SUBROUTINE dbm_library_print_stats_c(mpi_comm, print_func, output_unit) &
BIND(C, name="dbm_library_print_stats")
IMPORT :: C_FUNPTR, C_INT
INTEGER(KIND=C_INT), VALUE :: mpi_comm
TYPE(C_FUNPTR), VALUE :: print_func
INTEGER(KIND=C_INT), VALUE :: output_unit
END SUBROUTINE dbm_library_print_stats_c
END INTERFACE
! Since Fortran units groups can't be used from C, we pass a function pointer instead.
CALL dbm_library_print_stats_c(mpi_comm=mpi_comm, &
print_func=C_FUNLOC(print_func), &
output_unit=output_unit)

END SUBROUTINE dbm_library_print_stats

! **************************************************************************************************
!> \brief Callback to write to a Fortran output unit.
!> \param message ...
!> \param output_unit ...
!> \author Ole Schuett
! **************************************************************************************************
SUBROUTINE print_func(message, output_unit) BIND(C, name="dbm_api_print_func")
CHARACTER(LEN=1, KIND=C_CHAR), INTENT(IN) :: message(*)
INTEGER(KIND=C_INT), INTENT(IN), VALUE :: output_unit

CHARACTER(LEN=1000) :: buffer
INTEGER :: nchars

IF (output_unit <= 0) &
RETURN

! Convert C char array into Fortran string.
nchars = strlcpy_c2f(buffer, message)

! Print the message.
WRITE (output_unit, FMT="(A)", ADVANCE="NO") buffer(1:nchars)
END SUBROUTINE print_func

END MODULE dbm_api
129 changes: 128 additions & 1 deletion src/dbm/dbm_library.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,17 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "dbm_library.h"
#include "dbm_mempool.h"
#include "dbm_mpi.h"

#define DBM_NUM_COUNTERS 64

static int64_t **per_thread_counters = NULL;
static bool library_initialized = false;
static int max_threads = 0;

#if !defined(_OPENMP)
#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
Expand All @@ -30,7 +36,19 @@ void dbm_library_init(void) {
abort();
}

// Nothing to do, yet.
max_threads = omp_get_max_threads();
per_thread_counters = malloc(DBM_NUM_COUNTERS * sizeof(int64_t *));

// Using parallel regions to ensure memory is allocated near a thread's core.
#pragma omp parallel default(none) shared(per_thread_counters) \
num_threads(max_threads)
{
const int ithread = omp_get_thread_num();
const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
per_thread_counters[ithread] = malloc(counters_size);
memset(per_thread_counters[ithread], 0, counters_size);
}

library_initialized = true;
}

Expand All @@ -44,8 +62,117 @@ void dbm_library_finalize(void) {
abort();
}

for (int i = 0; i < max_threads; i++) {
free(per_thread_counters[i]);
}
free(per_thread_counters);
per_thread_counters = NULL;

dbm_mempool_clear();
library_initialized = false;
}

/*******************************************************************************
* \brief Computes min(3, floor(log10(x))).
* \author Ole Schuett
******************************************************************************/
static int floorlog10(const int x) {
if (x >= 1000)
return 3;
if (x >= 100)
return 2;
if (x >= 10)
return 1;
return 0;
}

/*******************************************************************************
* \brief Add given block multiplication to the stats.
* \author Ole Schuett
******************************************************************************/
void dbm_library_counter_increment(const int m, const int n, const int k) {
const int ithread = omp_get_thread_num();
assert(ithread < max_threads);
const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
per_thread_counters[ithread][idx]++;
}

/*******************************************************************************
* \brief Comperator passed to qsort to compare two counters.
* \author Ole Schuett
******************************************************************************/
static int compare_counters(const void *a, const void *b) {
return *(int64_t *)b - *(int64_t *)a;
}

/*******************************************************************************
* \brief Prints statistics gathered by the DBM library.
* \author Ole Schuett
******************************************************************************/
void dbm_library_print_stats(const int fortran_comm,
void (*print_func)(char *, int),
const int output_unit) {
if (!library_initialized) {
fprintf(stderr, "Error: DBM library is not initialized.\n");
abort();
}

const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm);
// Sum all counters across threads and mpi ranks.
int64_t counters[DBM_NUM_COUNTERS][2];
memset(counters, 0, DBM_NUM_COUNTERS * 2 * sizeof(int64_t));
double total = 0.0;
for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
counters[i][1] = i; // needed as inverse index after qsort
for (int j = 0; j < max_threads; j++) {
counters[i][0] += per_thread_counters[j][i];
}
dbm_mpi_sum_int64(&counters[i][0], 1, comm);
total += counters[i][0];
}

// Sort counters.
qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);

// Print counters.
print_func("\n", output_unit);
print_func(" ----------------------------------------------------------------"
"---------------\n",
output_unit);
print_func(" - "
" -\n",
output_unit);
print_func(" - DBM STATISTICS "
" -\n",
output_unit);
print_func(" - "
" -\n",
output_unit);
print_func(" ----------------------------------------------------------------"
"---------------\n",
output_unit);
print_func(" M x N x K "
"COUNT PERCENT\n",
output_unit);

const char *labels[] = {"?", "??", "???", ">999"};
for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
if (counters[i][0] == 0)
continue; // skip empty counters
const double percent = 100.0 * counters[i][0] / total;
const int idx = counters[i][1];
const int m = (idx % 64) / 16;
const int n = (idx % 16) / 4;
const int k = (idx % 4) / 1;
char buffer[100];
snprintf(buffer, sizeof(buffer), " %4s x %4s x %4s %46li %10.2f%%\n",
labels[m], labels[n], labels[k], counters[i][0], percent);
print_func(buffer, output_unit);
}

print_func(" ----------------------------------------------------------------"
"---------------\n",
output_unit);
}

// EOF
14 changes: 14 additions & 0 deletions src/dbm/dbm_library.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ void dbm_library_init(void);
******************************************************************************/
void dbm_library_finalize(void);

/*******************************************************************************
* \brief Add given block multiplication to the stats.
* \author Ole Schuett
******************************************************************************/
void dbm_library_counter_increment(const int m, const int n, const int k);

/*******************************************************************************
* \brief Prints statistics gathered by the DBM library.
* \author Ole Schuett
******************************************************************************/
void dbm_library_print_stats(const int fortran_comm,
void (*print_func)(char *, int),
const int output_unit);

#endif

// EOF
10 changes: 10 additions & 0 deletions src/dbm/dbm_miniapp.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
#include "dbm_matrix.h"
#include "dbm_mpi.h"

/*******************************************************************************
* \brief Wrapper for printf, passed to dbm_library_print_stats.
* \author Ole Schuett
******************************************************************************/
static void print_func(char *message, int output_unit) {
(void)output_unit; // mark used
printf("%s", message);
}

/*******************************************************************************
* \brief Private routine for creating a distribution and an empty matrix.
* \author Ole Schuett
Expand Down Expand Up @@ -173,6 +182,7 @@ int main(int argc, char *argv[]) {
printf("done :-)\n");
}

dbm_library_print_stats(dbm_mpi_comm_c2f(comm), &print_func, 0);
dbm_library_finalize();
dbm_mpi_comm_free(&comm);
dbm_mpi_finalize();
Expand Down
2 changes: 2 additions & 0 deletions src/dbm/dbm_multiply.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ static void multiply_packs(const bool transa, const bool transb,
const float alpha2 = alpha * alpha;

int64_t flop_sum = 0;

#pragma omp parallel for schedule(dynamic) reduction(+ : flop_sum)
for (int kshard = 0; kshard < matrix_c->nshards; kshard++) {
dbm_shard_t *shard_c = &matrix_c->shards[kshard];
Expand Down Expand Up @@ -222,6 +223,7 @@ static void multiply_packs(const bool transa, const bool transb,
// Count flops.
assert(m * n * k > 0);
flop_sum += 2 * m * n * k;
dbm_library_counter_increment(m, n, k);

// Invalidate norm of C block because its data is going to change.
blk_c->norm = -1.0;
Expand Down
4 changes: 3 additions & 1 deletion src/dbt/dbt_unittest.F
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ PROGRAM dbt_unittest
USE dbcsr_api, ONLY: dbcsr_finalize_lib,&
dbcsr_init_lib
USE dbm_api, ONLY: dbm_library_finalize,&
dbm_library_init
dbm_library_init,&
dbm_library_print_stats
USE dbt_test, ONLY: dbt_contract_test,&
dbt_reset_randmat_seed,&
dbt_setup_test_tensor,&
Expand Down Expand Up @@ -781,6 +782,7 @@ PROGRAM dbt_unittest
! End tests !
!--------------------------------------------------------------------------------------------------!

CALL dbm_library_print_stats(mp_comm, io_unit)
CALL dbm_library_finalize()
CALL dbcsr_finalize_lib() ! Needed for DBM_VALIDATE_AGAINST_DBCSR.

Expand Down
4 changes: 3 additions & 1 deletion src/dbt/tas/dbt_tas_unittest.F
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ PROGRAM dbt_tas_unittest
dbcsr_init_lib
USE dbm_api, ONLY: dbm_get_name,&
dbm_library_finalize,&
dbm_library_init
dbm_library_init,&
dbm_library_print_stats
USE dbt_tas_base, ONLY: dbt_tas_create,&
dbt_tas_destroy,&
dbt_tas_info,&
Expand Down Expand Up @@ -161,6 +162,7 @@ PROGRAM dbt_tas_unittest
CALL mp_comm_free(mp_comm_C)
CALL mp_comm_free(mp_comm_Ct)

CALL dbm_library_print_stats(mp_comm, io_unit)
CALL dbm_library_finalize()
CALL dbcsr_finalize_lib() ! Needed for DBM_VALIDATE_AGAINST_DBCSR.
CALL mp_world_finalize()
Expand Down
12 changes: 10 additions & 2 deletions src/grid/grid_unittest.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,20 @@
#include "common/grid_library.h"
#include "grid_replay.h"

void mpi_sum_func(long *number, int mpi_comm) {
/*******************************************************************************
* \brief Standin for mpi_sum, passed to grid_library_print_stats.
* \author Ole Schuett
******************************************************************************/
static void mpi_sum_func(long *number, int mpi_comm) {
*number += 0; // Nothing todo without MPI, pretend arguments are used anyways.
mpi_comm += 0;
}

void print_func(char *message, int output_unit) {
/*******************************************************************************
* \brief Wrapper for printf, passed to grid_library_print_stats.
* \author Ole Schuett
******************************************************************************/
static void print_func(char *message, int output_unit) {
output_unit += 0; // Pretend argument is used.
printf("%s", message);
}
Expand Down
1 change: 1 addition & 0 deletions src/start/PACKAGE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"../common",
"../input",
"../grid",
"../dbm",
"../pw",
"../fm",
"../tmc",
Expand Down
2 changes: 2 additions & 0 deletions src/start/cp2k_runs.F
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ MODULE cp2k_runs
dbcsr_init_lib,&
dbcsr_print_config,&
dbcsr_print_statistics
USE dbm_api, ONLY: dbm_library_print_stats
USE environment, ONLY: cp2k_finalize,&
cp2k_init,&
cp2k_read,&
Expand Down Expand Up @@ -423,6 +424,7 @@ RECURSIVE SUBROUTINE cp2k_run(input_declaration, input_file_name, output_unit, m

CALL dbcsr_print_statistics()

CALL dbm_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
CALL grid_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)

m_memory_max_mpi = m_memory_max
Expand Down

0 comments on commit 982fd8e

Please sign in to comment.