Skip to content

Commit

Permalink
Merge pull request #200 from bosilca/topic/allgatherv_start_end_times…
Browse files Browse the repository at this point in the history
…tamps

Add support for the MPIX_HARMONIZE extension.
  • Loading branch information
gvallee committed Feb 1, 2024
2 parents bf90a1a + bf03334 commit f70987e
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 77 deletions.
46 changes: 26 additions & 20 deletions src/allgatherv/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,41 @@ all: liballgatherv.so \
liballgatherv_comparebuffcontent.so \
liballgatherv_late_arrival.so

liballgatherv_displs.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_displs.o ../common/logger_displs.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_DISPLS=1 ../common/logger_for_displs.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_displs.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_displs.so
liballgatherv_displs.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_displs.o ../common/logger_displs.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ $(CFLAGS) -g -shared -Wall -fPIC -DENABLE_DISPLS=1 ../common/logger_for_displs.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_displs.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_displs.so $(LDFLAGS)

liballgatherv_counts.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/logger_for_counts.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ../common/logger_for_counts.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts.so
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_COUNTS=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts_notcompact.so
liballgatherv_counts.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/logger_for_counts.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g $(CFLAGS) -shared -Wall -fPIC -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ../common/logger_for_counts.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts.so $(LDFLAGS)
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=0 -DENABLE_COUNTS=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts_notcompact.so $(LDFLAGS)

liballgatherv_exec_timings.so: ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_exec_timings.so
liballgatherv_exec_timings.so: check-env ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_exec_timings.so $(LDFLAGS)

liballgatherv_late_arrival.so: ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_late_arrival.so
liballgatherv_late_arrival.so: check-env ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_late_arrival.so $(LDFLAGS)

liballgatherv_backtrace.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_backtrace.so
liballgatherv_backtrace.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_backtrace.so $(LDFLAGS)

liballgatherv_location.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_location.so
liballgatherv_location.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_location.so $(LDFLAGS)

liballgatherv_savebuffcontent.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_SAVE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_savebuffcontent.so -lssl -lcrypto
liballgatherv_savebuffcontent.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_SAVE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_savebuffcontent.so -lssl -lcrypto $(LDFLAGS)

liballgatherv_comparebuffcontent.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_COMPARE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_comparebuffcontent.so -lssl -lcrypto
liballgatherv_comparebuffcontent.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_COMPARE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_comparebuffcontent.so -lssl -lcrypto $(LDFLAGS)

liballgatherv.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv.so -lssl -lcrypto
liballgatherv.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h
mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv.so -lssl -lcrypto $(LDFLAGS)

check-env:
ifdef MPIX_HARMONIZE_PREFIX
CFLAGS+=-DHAVE_MPIX_HARMONIZE=1 -I$(MPIX_HARMONIZE_PREFIX)/include
LDFLAGS+=-L$(MPIX_HARMONIZE_PREFIX)/lib64 -Wl,-rpath $(MPIX_HARMONIZE_PREFIX)/lib64 -lmpix-harmonize -lmpits
endif

check: all

clean:
@rm -f *.so *.o
@rm -f *.so *.o
49 changes: 44 additions & 5 deletions src/allgatherv/mpi_allgatherv.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*************************************************************************
* Copyright (c) 2019-2010, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
Expand Down Expand Up @@ -53,6 +53,16 @@ double *late_arrival_timings = NULL;

static logger_t *logger = NULL;

#if defined(HAVE_MPIX_HARMONIZE)
#include <mpix_harmonize.h>

/* The frequency of re-harmonization, counting MPI_Allgatherv on MPI_COMM_WORLD */
#define TRAMPOLINE_FREQUENCY 50

static int _trampoline_flag = 0;
static int _trampoline_iterations = 0;
#endif /* defined(HAVE_MPIX_HARMONIZE) */

#if ENABLE_EXEC_TIMING
double timestamps_start[500];
double timestamps_end[500];
Expand Down Expand Up @@ -981,12 +991,26 @@ int MPI_Finalize()

int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
{
return _mpi_init_thread(argc, argv, required, provided);
int rc = _mpi_init_thread(argc, argv, required, provided);
#if defined(HAVE_MPIX_HARMONIZE)
if( MPI_SUCCESS == rc ) {
/* harmonize the clocks across all ranks in MPI_COMM_WORLD */
rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag);
}
#endif /* defined(HAVE_MPIX_HARMONIZE) */
return rc;
}

int MPI_Init(int *argc, char ***argv)
{
return _mpi_init(argc, argv);
int rc = _mpi_init(argc, argv);
#if defined(HAVE_MPIX_HARMONIZE)
if( MPI_SUCCESS == rc ) {
/* harmonize the clocks across all ranks in MPI_COMM_WORLD */
rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag);
}
#endif /* defined(HAVE_MPIX_HARMONIZE) */
return rc;
}

int mpi_init_thread_(MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr)
Expand Down Expand Up @@ -1642,6 +1666,20 @@ int MPI_Allgatherv(const void *sendbuf, const int sendcount, MPI_Datatype sendty
void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype,
MPI_Comm comm)
{
#if defined(HAVE_MPIX_HARMONIZE)
/* From time to time we need to resynchronize the clocks, but we can only do it on MPI_Allgatherv on
* MPI_COMM_WORLD.
*/
if( MPI_COMM_WORLD == comm ) {
_trampoline_iterations++;
if( 0 == (_trampoline_iterations % TRAMPOLINE_FREQUENCY) ) {
int rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag);
if( MPI_SUCCESS != rc ) {
MPI_Abort(MPI_COMM_WORLD, -1);
}
}
}
#endif /* defined(HAVE_MPIX_HARMONIZE) */
return _mpi_allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm);
}

Expand Down Expand Up @@ -1676,6 +1714,7 @@ void mpi_allgatherv_(void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
// if the app never calls MPI_Finalize().
__attribute__((destructor)) void calledLast()
{
_commit_data();
_finalize_profiling();
if( NULL == logger ) return; /* nothing more to do, already done */
_commit_data();
_finalize_profiling();
}

0 comments on commit f70987e

Please sign in to comment.