Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VPICIO bugfix #196

Merged
merged 6 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 23 additions & 11 deletions src/api/pdc_region/pdc_region_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ register_metadata(pdc_transfer_request_start_all_pkg **transfer_request_input, i
* sorted in terms of data_server_id. We pack data from user buffer to contiguous buffers. Static partitioning
* requires having at most n_data_servers number of contiguous regions.
*/
static int
static perr_t
prepare_start_all_requests(pdcid_t *transfer_request_id, int size,
pdc_transfer_request_start_all_pkg ***write_transfer_request_ptr,
pdc_transfer_request_start_all_pkg ***read_transfer_request_ptr,
Expand Down Expand Up @@ -934,7 +934,7 @@ prepare_start_all_requests(pdcid_t *transfer_request_id, int size,
printf("PDC Client PDCregion_transfer_start_all attempt to start existing transfer request @ "
"line %d\n",
__LINE__);
return 1;
return FAIL;
}
if (transfer_request->consistency == PDC_CONSISTENCY_POSIX) {
posix_transfer_request_id_ptr[0][posix_size_ptr[0]] = transfer_request_id[i];
Expand All @@ -959,6 +959,11 @@ prepare_start_all_requests(pdcid_t *transfer_request_id, int size,
&(transfer_request->n_obj_servers), &(transfer_request->obj_servers),
&(transfer_request->sub_offsets), &(transfer_request->output_offsets),
&(transfer_request->output_sizes), &(transfer_request->output_buf));
if (transfer_request->n_obj_servers == 0) {
printf("PDC_Client %d, %s: error with static region partition, no server is selected!\n",
pdc_client_mpi_rank_g, __func__);
return FAIL;
}
for (j = 0; j < transfer_request->n_obj_servers; ++j) {
request_pkgs =
(pdc_transfer_request_start_all_pkg *)malloc(sizeof(pdc_transfer_request_start_all_pkg));
Expand Down Expand Up @@ -1082,7 +1087,7 @@ prepare_start_all_requests(pdcid_t *transfer_request_id, int size,
else {
*read_size_ptr = 0;
}
return 0;
return SUCCEED;
}

static int
Expand Down Expand Up @@ -1321,8 +1326,9 @@ PDCregion_transfer_start_all(pdcid_t *transfer_request_id, int size)
FUNC_ENTER(NULL);
// Split write and read requests. Handle them separately.
// printf("PDCregion_transfer_start_all: checkpoint %d\n", __LINE__);
prepare_start_all_requests(transfer_request_id, size, &write_transfer_requests, &read_transfer_requests,
&write_size, &read_size, &posix_transfer_request_id, &posix_size);
ret_value = prepare_start_all_requests(transfer_request_id, size, &write_transfer_requests,
&read_transfer_requests, &write_size, &read_size,
&posix_transfer_request_id, &posix_size);
/*
printf("PDCregion_transfer_start_all: checkpoint %d, write_size = %d, read_size = %d\n", __LINE__,
write_size, read_size);
Expand Down Expand Up @@ -1454,12 +1460,18 @@ PDCregion_transfer_start(pdcid_t transfer_request_id)

if (transfer_request->region_partition == PDC_REGION_STATIC) {
// Identify which part of the region is going to which data server.
static_region_partition(transfer_request->new_buf, transfer_request->remote_region_ndim, unit,
transfer_request->access_type, transfer_request->obj_dims,
transfer_request->remote_region_offset, transfer_request->remote_region_size,
1, &(transfer_request->n_obj_servers), &(transfer_request->obj_servers),
&(transfer_request->sub_offsets), &(transfer_request->output_offsets),
&(transfer_request->output_sizes), &(transfer_request->output_buf));
ret_value = static_region_partition(
transfer_request->new_buf, transfer_request->remote_region_ndim, unit,
transfer_request->access_type, transfer_request->obj_dims, transfer_request->remote_region_offset,
transfer_request->remote_region_size, 1, &(transfer_request->n_obj_servers),
&(transfer_request->obj_servers), &(transfer_request->sub_offsets),
&(transfer_request->output_offsets), &(transfer_request->output_sizes),
&(transfer_request->output_buf));
if (transfer_request->n_obj_servers == 0) {
printf("PDC_Client %d, %s: error with static region partition, no server is selected!\n",
pdc_client_mpi_rank_g, __func__);
return FAIL;
}
/*
printf("n_obj_servers = %d\n", transfer_request->n_obj_servers);
for ( i = 0; i < transfer_request->n_obj_servers; ++i ) {
Expand Down
3 changes: 3 additions & 0 deletions src/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ set(SCRIPTS
run_test.sh
mpi_test.sh
run_multiple_test.sh
run_multiple_mpi_test.sh
run_checkpoint_restart_test.sh
)

Expand Down Expand Up @@ -487,6 +488,7 @@ if(BUILD_MPI_TESTING)
add_test(NAME obj_info_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND mpi_test.sh ./obj_info ${MPI_RUN_CMD} 4 6 )
add_test(NAME obj_put_data_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND mpi_test.sh ./obj_put_data ${MPI_RUN_CMD} 4 6 )
add_test(NAME obj_get_data_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND mpi_test.sh ./obj_get_data ${MPI_RUN_CMD} 4 6 )
add_test(NAME vpicio_bdcats_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_multiple_mpi_test.sh ${MPI_RUN_CMD} 4 6 ./vpicio ./bdcats)

set_tests_properties(read_obj_shared_int PROPERTIES LABELS "parallel;parallel_obj" )
set_tests_properties(read_obj_shared_float PROPERTIES LABELS "parallel;parallel_obj" )
Expand Down Expand Up @@ -514,6 +516,7 @@ if(BUILD_MPI_TESTING)
# set_tests_properties(region_transfer_2D_skewed_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
# set_tests_properties(region_transfer_3D_skewed_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
set_tests_properties(region_transfer_write_read_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
set_tests_properties(vpicio_bdcats_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
set_tests_properties(region_transfer_all_mpi PROPERTIES LABELS "parallel;parallel_region_transfer_all" )
set_tests_properties(region_transfer_all_2D_mpi PROPERTIES LABELS "parallel;parallel_region_transfer_all" )
set_tests_properties(region_transfer_all_3D_mpi PROPERTIES LABELS "parallel;parallel_region_transfer_all" )
Expand Down
52 changes: 52 additions & 0 deletions src/tests/run_multiple_mpi_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
# This version of the test runner doesn't attempt to run any parallel tests.
# We assume too, that if the library build has enabled MPI, that LD_LIBRARY_PATH is
# defined and points to the MPI libraries used by the linker (e.g. -L<path -lmpi)

extra_cmd=""

if [[ "$SUPERCOMPUTER" == "perlmutter" ]]; then
extra_cmd="--mem=25600 --cpu_bind=cores --overlap"
fi

if [ $# -lt 1 ]; then echo "missing test argument" && exit -1 ; fi
# check the test to be run:
mpi_cmd="$1"
n_servers="$2"
n_client="$3"
# copy the remaining test input arguments (if any)
all_test="${@:4}"
test_args=""
echo $all_test

rm -rf pdc_tmp pdc_data

# START the server (in the background)
echo "$mpi_cmd -n $n_servers $extra_cmd ./pdc_server.exe &"
$mpi_cmd -n $n_servers $extra_cmd ./pdc_server.exe &

# WAIT a bit, for 1 second
sleep 1

# RUN the actual test(s)
for test_exe in $all_test
do
if [ -x $test_exe ]; then echo "testing: $test_exe"; else echo "test: $test_exe not found or not and executable" && exit -2; fi
if [[ "$test_exe" = *vpicio ]]; then
test_args="1024"
fi
if [[ "$test_exe" = *bdcats ]]; then
test_args="1024"
fi
echo "$mpi_cmd -n $n_client $extra_cmd $test_exe $test_args"
$mpi_cmd -n $n_client $extra_cmd $test_exe $test_args
ret="$?"
done

# Need to test the return value
ret="$?"
# and shutdown the SERVER before exiting
echo "Close server"
echo "$mpi_cmd -n 1 $extra_cmd ./close_server"
$mpi_cmd -n 1 $extra_cmd ./close_server
exit $ret
2 changes: 1 addition & 1 deletion src/tests/vpicio.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ main(int argc, char **argv)
printf("Writing %" PRIu64 " number of particles with %d clients.\n", numparticles, size);
}

dims[0] = numparticles;
dims[0] = numparticles * size;
jeanbez marked this conversation as resolved.
Show resolved Hide resolved

x = (float *)malloc(numparticles * sizeof(float));
y = (float *)malloc(numparticles * sizeof(float));
Expand Down
Loading