Skip to content

Commit

Permalink
Apply clang-format
Browse files Browse the repository at this point in the history
  • Loading branch information
janciesko committed Mar 18, 2024
1 parent 374b1ef commit 9ab1a9e
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 68 deletions.
27 changes: 14 additions & 13 deletions benchmarks/access_overhead/access_overhead_p2p.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@
#include <type_traits>
#include <string>

//#define ACCESS_LDC_USE_MULTI_LDC
#define ACCESS_LDC_USE_MULTI_LDC_BUILTIN
#define NUM_TEAMS 1 /* Recursive subview support needed. TBD */
#define ACCESS_LDC_USE_MULTI_LDC
//#define ACCESS_LDC_USE_MULTI_LDC_BUILTIN

#define CHECK_FOR_CORRECTNESS
//#define CHECK_FOR_CORRECTNESS

using RemoteSpace_t = Kokkos::Experimental::DefaultRemoteMemorySpace;
using RemoteView_t = Kokkos::View<double *, RemoteSpace_t>;
Expand Down Expand Up @@ -486,11 +487,11 @@ struct Access_LDC<
Kokkos::pair(remote_range.first + start_offset,
remote_range.first + start_offset + team_block);

printf("[%lu, %lu], [%lu, %lu], [%lu, %lu], [%lu, %lu]\n",
team_remote_range.first, team_remote_range.second,
team_local_range.first, team_local_range.second,
remote_range.first, remote_range.second, local_range.first,
local_range.second);
// printf("[%lu, %lu], [%lu, %lu], [%lu, %lu], [%lu, %lu]\n",
// team_remote_range.first, team_remote_range.second,
// team_local_range.first, team_local_range.second,
// remote_range.first, remote_range.second, local_range.first,
// local_range.second);

// Construct team subviews
auto v_subview_remote = Kokkos::subview(v, team_remote_range);
Expand Down Expand Up @@ -617,8 +618,8 @@ struct Access_LDC<
time_a = timer.seconds();
#if defined(ACCESS_LDC_USE_MULTI_LDC) || \
defined(ACCESS_LDC_USE_MULTI_LDC_BUILTIN)
Kokkos::parallel_for("block_transfer", team_policy_get_update_t(4, 1),
*this);
Kokkos::parallel_for("block_transfer",
team_policy_get_update_t(NUM_TEAMS, 1), *this);
#else
Kokkos::parallel_for("block_transfer", team_policy_get_update_t(1, 1),
*this);
Expand Down Expand Up @@ -649,10 +650,10 @@ struct Access_LDC<
#if defined(ACCESS_LDC_USE_MULTI_LDC) || \
defined(ACCESS_LDC_USE_MULTI_LDC_BUILTIN)
Kokkos::parallel_for("block_transfer",
team_policy_put_update_t(64, 1), *this);
team_policy_put_update_t(NUM_TEAMS, 1), *this);
#else
Kokkos::parallel_for("block_transfer", team_policy_put_update_t(1, 1),
*this);
Kokkos::parallel_for("block_transfer",
team_policy_put_update_t(NUM_TEAMS, 1), *this);
#endif
Kokkos::fence();
#if defined(KOKKOS_REMOTE_SPACES_ENABLE_DEBUG) && (0)
Expand Down
56 changes: 28 additions & 28 deletions benchmarks/access_overhead/scripts/run_over_size_p2p.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#/bin/bash
BENCHMARK=$1
HOST=$2
DEFAULT_SIZE=100
DEFAULT_SIZE=1000

#exports
export OMP_PROC_BIND=spread
export OMP_PLACES=threads
export OMP_NUM_THREADS=32

ITERS=1
ITERS=30

HASH=`date|md5sum|head -c 5`
DEVS="0,1"
Expand All @@ -21,36 +21,36 @@ LD_LIBRARY_PATH=/projects/ppc64le-pwr9-rhel8/tpls/cuda/12.0.0/gcc/12.2.0/base/ra

#Kokkos Remote Spaces + LDC
let SIZE=$DEFAULT_SIZE
for S in $(seq 1 1); do
for reps in $(seq 1 1); do
for S in $(seq 1 20); do
for reps in $(seq 1 3); do
CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 3 | tee -a $FILENAME
done
let SIZE=$SIZE*2
done

# #Kokkos Remote Spaces
# let SIZE=$DEFAULT_SIZE
# for S in $(seq 1 20); do
# for reps in $(seq 1 3); do
# CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 2 | tee -a $FILENAME
# done
# let SIZE=$SIZE*2
# done
#Kokkos Remote Spaces
let SIZE=$DEFAULT_SIZE
for S in $(seq 1 20); do
for reps in $(seq 1 3); do
CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 2 | tee -a $FILENAME
done
let SIZE=$SIZE*2
done

# #Cuda-ware MPI + Kokkos
# let SIZE=$DEFAULT_SIZE
# for S in $(seq 1 20); do
# for reps in $(seq 1 3); do
# CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 1 | tee -a $FILENAME
# done
# let SIZE=$SIZE*2
# done
#Cuda-ware MPI + Kokkos
let SIZE=$DEFAULT_SIZE
for S in $(seq 1 20); do
for reps in $(seq 1 3); do
CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 1 | tee -a $FILENAME
done
let SIZE=$SIZE*2
done

# #MPI + Kokkos
# let SIZE=$DEFAULT_SIZE
# for S in $(seq 1 20); do
# for reps in $(seq 1 3); do
# CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 0 | tee -a $FILENAME
# done
# let SIZE=$SIZE*2
# done
#MPI + Kokkos
let SIZE=$DEFAULT_SIZE
for S in $(seq 1 20); do
for reps in $(seq 1 3); do
CUDA_VISIBLE_DEVICES=$DEVS mpirun -np 2 $VARS0 $VARS1 $VARS2 -host $HOST ./$BENCHMARK -N $SIZE -I $ITERS -M 0 | tee -a $FILENAME
done
let SIZE=$SIZE*2
done
3 changes: 0 additions & 3 deletions src/core/Kokkos_RemoteSpaces_LocalDeepCopy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,6 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(
auto src_subview_ptr = Kokkos::Impl::get_view_adr(src);
auto dst_subview_ptr = Kokkos::Impl::get_view_adr(dst);

printf("LDC: %p, %p, %p %p\n", dst.data(), src.data(), dst_subview_ptr,
src_subview_ptr);

if (src_rank != my_rank) {
#ifdef KRS_ENABLE_MPISPACE
src_data_block_t data_block = src_data_block_t(
Expand Down
21 changes: 11 additions & 10 deletions src/core/Kokkos_RemoteSpaces_ViewMapping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,19 +334,19 @@ class ViewMapping<

const SubviewExtents<SrcTraits::rank, rank> extents(src.m_offset.m_dim,
args...);
dst.m_offset = dst_offset_type(src.m_offset, extents);
dst.remote_view_props = src.remote_view_props;
dst.m_offset = dst_offset_type(src.m_offset, extents);
dst.remote_view_props = src.remote_view_props;
bool switch_to_local_indexing = false;

/*We currently support only subviews of subviews where the first subview is
created with a scalar over the leading dim*/
/*Subviews that span across multiple nodes cannot have subviews in this version
*/
/*Subviews that span across multiple nodes cannot have subviews in this
* version
*/
if (!src.remote_view_props.using_local_indexing) {
dst.remote_view_props.using_local_indexing = !R0 ? true : false;
dst.remote_view_props.R0_offset = extents.domain_offset(0);
}
else
dst.remote_view_props.R0_offset = extents.domain_offset(0);
} else
switch_to_local_indexing = true;

typename view_type::size_type offset;
Expand All @@ -362,7 +362,7 @@ class ViewMapping<
extents.domain_offset(3), extents.domain_offset(4),
extents.domain_offset(5), extents.domain_offset(6),
extents.domain_offset(7));

#ifdef KRS_ENABLE_MPISPACE
// Subviews propagate MPI_Window of the original view
dst.m_handle = ViewDataHandle<DstTraits>::assign(
Expand Down Expand Up @@ -416,7 +416,7 @@ class ViewMapping<Traits, Kokkos::Experimental::RemoteSpaceSpecializeTag> {
template <typename T = Traits>
KOKKOS_INLINE_FUNCTION int get_logical_PE(ENABLE_IF_GLOBAL_LAYOUT) const {
// If View is subview, compute owning PE of index R0_offset
if (USING_LOCAL_INDEXING)
if (USING_GLOBAL_INDEXING && remote_view_props.R0_offset != 0)
return compute_dim0_offsets(remote_view_props.R0_offset).PE;
// Else, return my_PE
return remote_view_props.my_PE;
Expand All @@ -425,7 +425,8 @@ class ViewMapping<Traits, Kokkos::Experimental::RemoteSpaceSpecializeTag> {
template <typename T = Traits>
KOKKOS_INLINE_FUNCTION int get_logical_PE(
ENABLE_IF_PARTITIONED_LAYOUT) const {
if (USING_LOCAL_INDEXING) return remote_view_props.R0_offset;
if (USING_GLOBAL_INDEXING && remote_view_props.R0_offset != 0)
return remote_view_props.R0_offset;
return remote_view_props.my_PE;
}

Expand Down
29 changes: 15 additions & 14 deletions unit_tests/Test_Subview.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ void test_subview3D(int i1, int i2, int i3) {
for (int k = 0; k < v_h.extent(2); ++k) ASSERT_EQ(v_h(i, j, k), 2);
}


template <class Data_t>
void test_subview3D_byRank(int i1, int i2, int i3) {
int my_rank;
Expand All @@ -189,8 +188,10 @@ void test_subview3D_byRank(int i1, int i2, int i3) {
Kokkos::Experimental::get_range(i1, (my_rank + 1) % num_ranks);

// Set to next rank
auto v_sub_1 = Kokkos::subview(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);
//auto v_sub_2 = ViewRemote_2D_t(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);
auto v_sub_1 =
Kokkos::subview(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);
// auto v_sub_2 = ViewRemote_2D_t(v, remote_range.first, Kokkos::ALL,
// Kokkos::ALL);

// Init
for (int i = 0; i < v_h.extent(0); ++i)
Expand All @@ -204,7 +205,7 @@ void test_subview3D_byRank(int i1, int i2, int i3) {
"Increment", v_sub_1.extent(0), KOKKOS_LAMBDA(const int i) {
for (int j = 0; j < v_sub_1.extent(1); ++j) {
v_sub_1(i, j)++;
// v_sub_2(i, j)++;
// v_sub_2(i, j)++;
}
});

Expand All @@ -230,7 +231,6 @@ void test_subviewOfSubview_Scalar_3D(int i1, int i2, int i3) {
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);


using ViewRemote_3D_t = Kokkos::View<Data_t ***, RemoteSpace_t>;
using ViewRemote_2D_t = Kokkos::View<Data_t **, RemoteSpace_t>;
using ViewHost_3D_t = typename ViewRemote_3D_t::HostMirror;
Expand All @@ -242,8 +242,10 @@ void test_subviewOfSubview_Scalar_3D(int i1, int i2, int i3) {
Kokkos::Experimental::get_range(i1, (my_rank + 1) % num_ranks);

// Set to next rank
auto v_sub_1 = Kokkos::subview(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);
auto v_sub_2 = ViewRemote_2D_t(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);
auto v_sub_1 =
Kokkos::subview(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);
auto v_sub_2 =
ViewRemote_2D_t(v, remote_range.first, Kokkos::ALL, Kokkos::ALL);

int i2_half = static_cast<int>(i2 * 0.5);

Expand All @@ -263,10 +265,10 @@ void test_subviewOfSubview_Scalar_3D(int i1, int i2, int i3) {

Kokkos::parallel_for(
"Increment", v_sub_1_half.extent(0), KOKKOS_LAMBDA(const int j) {
for (int k = 0; k < v_sub_1_half.extent(1); ++k){
v_sub_1_half(j, k)++;
v_sub_2_half(j, k)++;
}
for (int k = 0; k < v_sub_1_half.extent(1); ++k) {
v_sub_1_half(j, k)++;
v_sub_2_half(j, k)++;
}
});

Kokkos::fence();
Expand All @@ -275,7 +277,7 @@ void test_subviewOfSubview_Scalar_3D(int i1, int i2, int i3) {

for (int i = 0; i < v_h.extent(0); ++i)
for (int j = 0; j < v_h.extent(1); ++j)
if(j < i2_half)
if (j < i2_half)
for (int k = 0; k < v_h.extent(2); ++k) ASSERT_EQ(v_h(i, j, k), 0);
else
for (int k = 0; k < v_h.extent(2); ++k) ASSERT_EQ(v_h(i, j, k), 2);
Expand Down Expand Up @@ -336,13 +338,12 @@ void test_subviewOfSubview_Range_3D(int i1, int i2, int i3) {

for (int i = 0; i < v_h.extent(0); ++i)
for (int j = 0; j < v_h.extent(1); ++j)
if(j < i2_half)
if (j < i2_half)
for (int k = 0; k < v_h.extent(2); ++k) ASSERT_EQ(v_h(i, j, k), 0);
else
for (int k = 0; k < v_h.extent(2); ++k) ASSERT_EQ(v_h(i, j, k), 2);
}


template <class Data_t>
void test_subview3D_DCCopiesSubviewAccess(int i1, int i2, int i3) {
int my_rank;
Expand Down

0 comments on commit 9ab1a9e

Please sign in to comment.