From 2988db4aa10150163bf7a7bdfb254e7212fe5896 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 2 May 2022 15:56:48 -0400 Subject: [PATCH 01/30] blaspp seems to always define empty BLA_VENDOR in CACHE so only look at its value, not its presence --- cmake/modules/FindOrFetchBTAS.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/modules/FindOrFetchBTAS.cmake b/cmake/modules/FindOrFetchBTAS.cmake index 775d1964b6..c484905f9d 100644 --- a/cmake/modules/FindOrFetchBTAS.cmake +++ b/cmake/modules/FindOrFetchBTAS.cmake @@ -13,9 +13,9 @@ if (NOT TARGET BTAS::BTAS) # BTAS will load BLAS++/LAPACK++ ... if those use CMake's FindBLAS/FindLAPACK (as indicated by defined BLA_VENDOR) # will need to specify Fortran linkage convention ... manually for now, switching to NWX's linear algebra discovery # is necessary to handle all the corner cases for automatic discovery - if (DEFINED BLA_VENDOR) + if (BLA_VENDOR) set(_linalgpp_use_standard_linalg_kits TRUE) - endif(DEFINED BLA_VENDOR) + endif(BLA_VENDOR) if (NOT TILEDARRAY_HAS_CUDA) # tell BLAS++/LAPACK++ to ignore CUDA From 0afdbfe5267d277498f94946241e144612c9cb5f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 2 May 2022 16:35:39 -0400 Subject: [PATCH 02/30] update unit tests to replace DistArray::range() with DistArray::tiles_range() per 8cf3327ef1489847e65b0fd7d01c8eb3e773ea54 --- src/TiledArray/expressions/blk_tsr_expr.h | 2 +- tests/dist_array.cpp | 46 +++++++-------- tests/dist_eval_binary_eval.cpp | 2 +- tests/dist_eval_contraction_eval.cpp | 4 +- tests/eigen.cpp | 40 ++++++------- tests/expressions_cuda_um.cpp | 69 +++++++++++++++-------- tests/expressions_impl.h | 68 ++++++++++++++-------- tests/tot_array_fixture.h | 2 +- tests/tot_dist_array_part2.cpp | 34 +++++------ tests/tot_expressions.cpp | 10 ++-- 10 files changed, 161 insertions(+), 116 deletions(-) diff --git a/src/TiledArray/expressions/blk_tsr_expr.h b/src/TiledArray/expressions/blk_tsr_expr.h index 5604d71d63..00b19d453e 100644 --- a/src/TiledArray/expressions/blk_tsr_expr.h +++ b/src/TiledArray/expressions/blk_tsr_expr.h @@ -234,7 +234,7 @@ class BlkTsrExprBase : public Expr { BlkTsrExprBase(reference array, const std::string& annotation, const PairRange& bounds) : Expr_(), array_(array), annotation_(annotation) { - const auto rank = array.range().rank(); + const auto rank = array.tiles_range().rank(); lower_bound_.reserve(rank); upper_bound_.reserve(rank); int d = 0; diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index 7005ac60d0..d65bf73f86 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -36,8 +36,8 @@ ArrayFixture::ArrayFixture() : shape_tensor(tr.tiles_range(), 0.0), world(*GlobalFixture::world), a(world, tr) { - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it) + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it) if (a.is_local(*it)) a.set(*it, world.rank() + 1); // Fill the tile at *it (the index) @@ -46,8 +46,8 @@ ArrayFixture::ArrayFixture() } b = decltype(b)(world, tr, TiledArray::SparseShape(shape_tensor, tr)); - for (SpArrayN::range_type::const_iterator it = b.range().begin(); - it != b.range().end(); ++it) + for (SpArrayN::range_type::const_iterator it = b.tiles_range().begin(); + it != b.tiles_range().end(); ++it) if (!b.is_zero(*it) && b.is_local(*it)) b.set(*it, world.rank() + 1); // Fill the tile at *it (the index) @@ -305,8 +305,8 @@ BOOST_AUTO_TEST_CASE(owner) { std::default_delete()); ordinal_type o = 0; - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it, ++o) { + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it, ++o) { // Check that local ownership agrees const int owner = a.owner(*it); BOOST_CHECK_EQUAL(a.owner(o), owner); @@ -334,8 +334,8 @@ BOOST_AUTO_TEST_CASE(is_local) { // Test to make sure everyone agrees who owns which tiles. ordinal_type o = 0; - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it, ++o) { + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it, ++o) { // Check that local ownership agrees const bool local_tile = a.owner(o) == world.rank(); BOOST_CHECK_EQUAL(a.is_local(*it), local_tile); @@ -352,8 +352,8 @@ BOOST_AUTO_TEST_CASE(is_local) { } BOOST_AUTO_TEST_CASE(find_local) { - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it) { + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it) { if (a.is_local(*it)) { Future tile = a.find(*it); @@ -366,7 +366,7 @@ BOOST_AUTO_TEST_CASE(find_local) { } } - for (auto&& tile_idx : a.range()) { + for (auto&& tile_idx : a.tiles_range()) { if (a.is_local(tile_idx)) { const Future& const_tile_fut = a.find_local(tile_idx); Future& nonconst_tile_fut = a.find_local(tile_idx); @@ -393,8 +393,8 @@ BOOST_AUTO_TEST_CASE(find_local) { } BOOST_AUTO_TEST_CASE(find_remote) { - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it) { + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it) { if (!a.is_local(*it)) { Future tile = a.find(*it); @@ -409,8 +409,8 @@ BOOST_AUTO_TEST_CASE(find_remote) { BOOST_AUTO_TEST_CASE(fill_tiles) { ArrayN a(world, tr); - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it) { + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it) { if (a.is_local(*it)) { a.set(*it, 0); // Fill the tile at *it (the index) with 0 @@ -430,8 +430,8 @@ BOOST_AUTO_TEST_CASE(assign_tiles) { std::vector data; ArrayN a(world, tr); - for (ArrayN::range_type::const_iterator it = a.range().begin(); - it != a.range().end(); ++it) { + for (ArrayN::range_type::const_iterator it = a.tiles_range().begin(); + it != a.tiles_range().end(); ++it) { ArrayN::trange_type::range_type range = a.trange().make_tile_range(*it); if (a.is_local(*it)) { if (data.size() < range.volume()) data.resize(range.volume(), 1); @@ -500,8 +500,8 @@ BOOST_AUTO_TEST_CASE(truncate) { BOOST_CHECK_NO_THROW(b_trunc0.truncate()); auto b_trunc1 = b.clone(); BOOST_CHECK_NO_THROW( - b_trunc1.truncate(std::numeric_limits::max())); + b_trunc1.truncate(std::numeric_limits< + typename decltype(b)::shape_type::value_type>::max())); BOOST_CHECK(std::distance(b_trunc1.begin(), b_trunc1.end()) == 0); } @@ -623,12 +623,12 @@ BOOST_AUTO_TEST_CASE(parallel_serialization) { char archive_file_prefix_name[] = "tmp.XXXXXX"; mktemp(archive_file_prefix_name); madness::archive::ParallelOutputArchive<> oar(world, archive_file_prefix_name, - nio); + nio); oar& a; oar.close(); madness::archive::ParallelInputArchive<> iar(world, archive_file_prefix_name, - nio); + nio); decltype(a) aread; aread.load(world, iar); @@ -647,12 +647,12 @@ BOOST_AUTO_TEST_CASE(parallel_sparse_serialization) { char archive_file_prefix_name[] = "tmp.XXXXXX"; mktemp(archive_file_prefix_name); madness::archive::ParallelOutputArchive<> oar(world, archive_file_prefix_name, - nio); + nio); oar& b; oar.close(); madness::archive::ParallelInputArchive<> iar(world, archive_file_prefix_name, - nio); + nio); decltype(b) bread; bread.load(world, iar); diff --git a/tests/dist_eval_binary_eval.cpp b/tests/dist_eval_binary_eval.cpp index a79ac1ef31..5e8368fd81 100644 --- a/tests/dist_eval_binary_eval.cpp +++ b/tests/dist_eval_binary_eval.cpp @@ -215,7 +215,7 @@ BOOST_AUTO_TEST_CASE(perm_eval) { for (auto index : *dist_eval.pmap()) { // Get the original tiles const std::size_t arg_index = - left.range().ordinal(inv_perm * dist_eval.range().idx(index)); + left.tiles_range().ordinal(inv_perm * dist_eval.range().idx(index)); const TArrayI::value_type left_tile = left.find(arg_index); const TArrayI::value_type right_tile = right.find(arg_index); diff --git a/tests/dist_eval_contraction_eval.cpp b/tests/dist_eval_contraction_eval.cpp index 1c31328359..6e59e2f93b 100644 --- a/tests/dist_eval_contraction_eval.cpp +++ b/tests/dist_eval_contraction_eval.cpp @@ -115,9 +115,9 @@ struct ContractionEvalFixture : public SparseShapeFixture { const int middle) { // Compute the number of rows and columns in the matrix, and a new weight // that is bisected the row and column dimensions. - std::vector weight(array.range().rank(), 0ul); + std::vector weight(array.tiles_range().rank(), 0ul); std::size_t MN[2] = {1ul, 1ul}; - const int dim = array.range().rank(); + const int dim = array.tiles_range().rank(); int i = dim - 1; for (; i >= middle; --i) { weight[i] = MN[1]; diff --git a/tests/eigen.cpp b/tests/eigen.cpp index 6196591713..bfa4f1a0db 100644 --- a/tests/eigen.cpp +++ b/tests/eigen.cpp @@ -172,8 +172,8 @@ BOOST_AUTO_TEST_CASE(matrix_to_array) { (array = eigen_to_array(*GlobalFixture::world, trange, matrix))); // Check that the data in array is equal to that in matrix - for (Range::const_iterator it = array.range().begin(); - it != array.range().end(); ++it) { + for (Range::const_iterator it = array.tiles_range().begin(); + it != array.tiles_range().end(); ++it) { Future tile = array.find(*it); for (Range::const_iterator tile_it = tile.get().range().begin(); tile_it != tile.get().range().end(); ++tile_it) { @@ -193,8 +193,8 @@ BOOST_AUTO_TEST_CASE(vector_to_array) { trange1, vector))); // Check that the data in array matches the data in vector - for (Range::const_iterator it = array1.range().begin(); - it != array1.range().end(); ++it) { + for (Range::const_iterator it = array1.tiles_range().begin(); + it != array1.tiles_range().end(); ++it) { Future tile = array1.find(*it); for (Range::const_iterator tile_it = tile.get().range().begin(); tile_it != tile.get().range().end(); ++tile_it) { @@ -211,8 +211,8 @@ BOOST_AUTO_TEST_CASE(array_to_matrix) { if (GlobalFixture::world->size() == 1) { // Fill the array with random data GlobalFixture::world->srand(27); - for (Range::const_iterator it = array.range().begin(); - it != array.range().end(); ++it) { + for (Range::const_iterator it = array.tiles_range().begin(); + it != array.tiles_range().end(); ++it) { TArrayI::value_type tile(array.trange().make_tile_range(*it)); for (TArrayI::value_type::iterator tile_it = tile.begin(); tile_it != tile.end(); ++tile_it) { @@ -235,8 +235,8 @@ BOOST_AUTO_TEST_CASE(array_to_matrix) { array.trange().elements_range().extent(1)); // Check that the data in matrix matches the data in array - for (Range::const_iterator it = array.range().begin(); - it != array.range().end(); ++it) { + for (Range::const_iterator it = array.tiles_range().begin(); + it != array.tiles_range().end(); ++it) { Future tile = array.find(*it); for (Range::const_iterator tile_it = tile.get().range().begin(); tile_it != tile.get().range().end(); ++tile_it) { @@ -281,8 +281,8 @@ BOOST_AUTO_TEST_CASE(array_to_matrix) { array.trange().elements_range().extent(1)); // Check that the data in vector matches the data in array - for (Range::const_iterator it = array.range().begin(); - it != array.range().end(); ++it) { + for (Range::const_iterator it = array.tiles_range().begin(); + it != array.tiles_range().end(); ++it) { BOOST_CHECK(array.is_local(*it)); Future tile = array.find(*it); @@ -301,8 +301,8 @@ BOOST_AUTO_TEST_CASE(array_to_vector) { if (GlobalFixture::world->size() == 1) { // Fill the array with random data GlobalFixture::world->srand(27); - for (Range::const_iterator it = array1.range().begin(); - it != array1.range().end(); ++it) { + for (Range::const_iterator it = array1.tiles_range().begin(); + it != array1.tiles_range().end(); ++it) { TArrayI::value_type tile(array1.trange().make_tile_range(*it)); for (TArrayI::value_type::iterator tile_it = tile.begin(); tile_it != tile.end(); ++tile_it) { @@ -320,8 +320,8 @@ BOOST_AUTO_TEST_CASE(array_to_vector) { BOOST_CHECK_EQUAL(vector.cols(), 1); // Check that the data in vector matches the data in array - for (Range::const_iterator it = array1.range().begin(); - it != array1.range().end(); ++it) { + for (Range::const_iterator it = array1.tiles_range().begin(); + it != array1.tiles_range().end(); ++it) { Future tile = array1.find(*it); for (Range::const_iterator tile_it = tile.get().range().begin(); tile_it != tile.get().range().end(); ++tile_it) { @@ -359,8 +359,8 @@ BOOST_AUTO_TEST_CASE(array_to_vector) { BOOST_CHECK_EQUAL(vector.cols(), 1); // Check that the data in vector matches the data in array - for (Range::const_iterator it = array1.range().begin(); - it != array1.range().end(); ++it) { + for (Range::const_iterator it = array1.tiles_range().begin(); + it != array1.tiles_range().end(); ++it) { BOOST_CHECK(array1.is_local(*it)); Future tile = array1.find(*it); @@ -430,8 +430,8 @@ BOOST_AUTO_TEST_CASE(tensor_to_array) { *GlobalFixture::world, trangeN, tensor))); // Check that the data in array is equal to that in matrix - for (Range::const_iterator it = array.range().begin(); - it != array.range().end(); ++it) { + for (Range::const_iterator it = array.tiles_range().begin(); + it != array.tiles_range().end(); ++it) { Future tile = array.find(*it); for (Range::const_iterator tile_it = tile.get().range().begin(); tile_it != tile.get().range().end(); ++tile_it) { @@ -497,8 +497,8 @@ BOOST_AUTO_TEST_CASE(array_to_tensor) { arrayN.trange().elements_range().extent().end()); // Check that the data in vector matches the data in array - for (Range::const_iterator it = arrayN.range().begin(); - it != arrayN.range().end(); ++it) { + for (Range::const_iterator it = arrayN.tiles_range().begin(); + it != arrayN.tiles_range().end(); ++it) { BOOST_CHECK(arrayN.is_local(*it)); Future tile = arrayN.find(*it); diff --git a/tests/expressions_cuda_um.cpp b/tests/expressions_cuda_um.cpp index b03ec0c994..15cdf2146d 100644 --- a/tests/expressions_cuda_um.cpp +++ b/tests/expressions_cuda_um.cpp @@ -305,7 +305,8 @@ BOOST_AUTO_TEST_CASE(permute) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = b("c,b,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (a.is_local(perm_index)) { TArrayUMD::value_type a_tile = a.find(perm_index).get(); TArrayUMD::value_type perm_b_tile = permute_fn(b.find(i), perm); @@ -333,7 +334,8 @@ BOOST_AUTO_TEST_CASE(permute) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = b("b,c,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm2 * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm2 * b.tiles_range().idx(i)); if (a.is_local(perm_index)) { TArrayUMD::value_type a_tile = a.find(perm_index).get(); TArrayUMD::value_type perm_b_tile = permute_fn(b.find(i), perm2); @@ -350,7 +352,8 @@ BOOST_AUTO_TEST_CASE(scale_permute) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = 2 * b("c,b,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (a.is_local(perm_index)) { TArrayUMD::value_type a_tile = a.find(perm_index).get(); TArrayUMD::value_type perm_b_tile = permute_fn(b.find(i), perm); @@ -524,7 +527,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = a("c,b,a").block({3, 3, 3}, {5, 5, 5})); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index))) { auto arg_tile = permute_fn(a.find(block_range.ordinal(perm_index)), perm); @@ -543,7 +547,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { 2 * a("c,b,a").block({3, 3, 3}, {5, 5, 5})); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index))) { auto arg_tile = permute_fn(a.find(block_range.ordinal(perm_index)), perm); @@ -563,7 +568,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5}))); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index)) || !b.is_zero(block_range.ordinal(index))) { @@ -584,7 +590,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { 4 * b("c,b,a").block({3, 3, 3}, {5, 5, 5}))); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index)) || !b.is_zero(block_range.ordinal(perm_index))) { @@ -867,7 +874,8 @@ BOOST_AUTO_TEST_CASE(add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -879,7 +887,8 @@ BOOST_AUTO_TEST_CASE(add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -958,7 +967,8 @@ BOOST_AUTO_TEST_CASE(scale_add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -970,7 +980,8 @@ BOOST_AUTO_TEST_CASE(scale_add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1058,7 +1069,8 @@ BOOST_AUTO_TEST_CASE(subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1070,7 +1082,8 @@ BOOST_AUTO_TEST_CASE(subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1133,7 +1146,8 @@ BOOST_AUTO_TEST_CASE(scale_subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1145,7 +1159,8 @@ BOOST_AUTO_TEST_CASE(scale_subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1233,7 +1248,8 @@ BOOST_AUTO_TEST_CASE(mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1245,7 +1261,8 @@ BOOST_AUTO_TEST_CASE(mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1308,7 +1325,8 @@ BOOST_AUTO_TEST_CASE(scale_mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1320,7 +1338,8 @@ BOOST_AUTO_TEST_CASE(scale_mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -2459,7 +2478,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { double expected = 0; for (std::size_t i = 0ul; i < a.size(); ++i) { TArrayUMD::value_type a_tile = a.find(i).get(); - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); for (std::size_t j = 0ul; j < a_tile.size(); ++j) @@ -2476,7 +2496,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); @@ -2495,7 +2516,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); @@ -2516,7 +2538,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); diff --git a/tests/expressions_impl.h b/tests/expressions_impl.h index 0ffbf4754e..76bb75a06c 100644 --- a/tests/expressions_impl.h +++ b/tests/expressions_impl.h @@ -237,7 +237,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = b("c,b,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (a.is_local(perm_index) && !a.is_zero(perm_index)) { auto a_tile = a.find(perm_index).get(); auto perm_b_tile = perm * b.find(i).get(); @@ -258,7 +259,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = 2 * b("c,b,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (a.is_local(perm_index) && !a.is_zero(perm_index)) { auto a_tile = a.find(perm_index).get(); auto perm_b_tile = perm * b.find(i).get(); @@ -501,7 +503,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(permute_block, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = a("c,b,a").block({3, 3, 3}, {5, 5, 5})); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index))) { auto arg_tile = perm * a.find(block_range.ordinal(perm_index)).get(); @@ -520,7 +523,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(permute_block, F, Fixtures, F) { 2 * a("c,b,a").block({3, 3, 3}, {5, 5, 5})); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index))) { auto arg_tile = perm * a.find(block_range.ordinal(perm_index)).get(); @@ -540,7 +544,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(permute_block, F, Fixtures, F) { 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5}))); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index)) || !b.is_zero(block_range.ordinal(index))) { @@ -565,7 +570,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(permute_block, F, Fixtures, F) { 4 * b("c,b,a").block({3, 3, 3}, {5, 5, 5}))); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index)) || !b.is_zero(block_range.ordinal(perm_index))) { @@ -681,7 +687,7 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_permute_block, F, Fixtures, for (std::size_t index = 0ul; index < block_range.volume(); ++index) { // const size_t perm_index = block_range.ordinal(perm * - // c.range().idx(index)); + // c.tiles_range().idx(index)); auto perm_index = perm * block_range.idx(index); if (!a.is_zero(block_range.ordinal(perm_index))) { @@ -961,7 +967,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(add_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = (2 * a("c,b,a")) + (3 * b("a,b,c"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -979,7 +986,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(add_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = (2 * a("c,b,a")) + (3 * b("c,b,a"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1098,7 +1106,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_add_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) + (3 * b("a,b,c"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1116,7 +1125,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_add_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) + (3 * b("c,b,a"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1258,7 +1268,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(sub_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = (2 * a("c,b,a")) - (3 * b("a,b,c"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1276,7 +1287,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(sub_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = (2 * a("c,b,a")) - (3 * b("c,b,a"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1377,7 +1389,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_sub_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) - (3 * b("a,b,c"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1395,7 +1408,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_sub_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) - (3 * b("c,b,a"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1495,7 +1509,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(mult_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = (2 * a("c,b,a")) * (3 * b("a,b,c"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1513,7 +1528,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(mult_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = (2 * a("c,b,a")) * (3 * b("c,b,a"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1656,7 +1672,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_mult_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) * (3 * b("a,b,c"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -1674,7 +1691,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(scale_mult_permute, F, Fixtures, F) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) * (3 * b("c,b,a"))); for (std::size_t i = 0ul; i < c.size(); ++i) { - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); if (!c.is_zero(i)) { auto c_tile = c.find(i).get(); auto a_tile = a.is_zero(perm_index) ? F::make_zero_tile(c_tile.range()) @@ -2794,7 +2812,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(dot_permute, F, Fixtures, F) { // Compute the expected value for the dot function. typename F::element_type expected = 0; for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); @@ -2814,7 +2833,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(dot_permute, F, Fixtures, F) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) || !b.is_zero(perm_index)) { auto a_tile = a.is_zero(i) ? F::make_zero_tile(a.trange().tile(i)) : a.find(i).get(); @@ -2836,7 +2856,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(dot_permute, F, Fixtures, F) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); @@ -2857,7 +2878,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(dot_permute, F, Fixtures, F) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) || !b.is_zero(perm_index)) { auto a_tile = a.is_zero(i) ? F::make_zero_tile(a.trange().tile(i)) : a.find(i).get(); diff --git a/tests/tot_array_fixture.h b/tests/tot_array_fixture.h index 45a0ae0f8e..9d46fadcc7 100644 --- a/tests/tot_array_fixture.h +++ b/tests/tot_array_fixture.h @@ -264,7 +264,7 @@ struct ToTArrayFixture { // Same components? Here we make all ranks check all tiles bool are_same = true; - for (auto idx : lhs.range()) { + for (auto idx : lhs.tiles_range()) { const auto& lhs_tot = lhs.find(idx).get(); const auto& rhs_tot = rhs.find(idx).get(); if (lhs_tot != rhs_tot) { diff --git a/tests/tot_dist_array_part2.cpp b/tests/tot_dist_array_part2.cpp index 9473a5ef6a..b916812884 100644 --- a/tests/tot_dist_array_part2.cpp +++ b/tests/tot_dist_array_part2.cpp @@ -255,17 +255,17 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(trange, TestParam, test_params) { } } -BOOST_AUTO_TEST_CASE_TEMPLATE(range, TestParam, test_params) { +BOOST_AUTO_TEST_CASE_TEMPLATE(tiles_range, TestParam, test_params) { { tensor_type t; if (m_world.nproc() == 1) - BOOST_CHECK_THROW(t.range(), TiledArray::Exception); + BOOST_CHECK_THROW(t.tiles_range(), TiledArray::Exception); } for (auto tr_t : run_all()) { auto& tr = std::get<0>(tr_t); auto& corr = std::get<2>(tr_t); - bool are_same = corr.range() == tr.tiles_range(); + bool are_same = corr.tiles_range() == tr.tiles_range(); BOOST_TEST(are_same); } } @@ -344,7 +344,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(call_operator, TestParam, test_params) { for (auto tr_t : run_all()) { auto inner_rank = std::get<1>(tr_t); auto& t = std::get<2>(tr_t); - auto outer_rank = t.range().rank(); + auto outer_rank = t.tiles_range().rank(); std::string outer_idx = (outer_rank == 1 ? "i" : "i,j"); std::string inner_idx = (inner_rank == 1 ? "k" : "k,l"); @@ -367,7 +367,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(const_call_operator, TestParam, test_params) { for (auto tr_t : run_all()) { auto inner_rank = std::get<1>(tr_t); const auto& t = std::get<2>(tr_t); - auto outer_rank = t.range().rank(); + auto outer_rank = t.tiles_range().rank(); std::string outer_idx = (outer_rank == 1 ? "i" : "i,j"); std::string inner_idx = (inner_rank == 1 ? "k" : "k,l"); @@ -433,8 +433,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(owner, TestParam, test_params) { BOOST_CHECK_THROW(corr.owner(bad_idx), TiledArray::Exception); } - for (auto idx : corr.range()) { - const auto ordinal = corr.range().ordinal(idx); + for (auto idx : corr.tiles_range()) { + const auto ordinal = corr.tiles_range().ordinal(idx); BOOST_TEST(corr.owner(idx) == corr.pmap()->owner(ordinal)); BOOST_TEST(corr.owner(ordinal) == corr.pmap()->owner(ordinal)); } @@ -468,8 +468,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(owner_init_list, TestParam, test_params) { BOOST_CHECK_THROW(corr.owner(il2), except_t); } - for (auto idx : corr.range()) { - const auto ordinal = corr.range().ordinal(idx); + for (auto idx : corr.tiles_range()) { + const auto ordinal = corr.tiles_range().ordinal(idx); const auto owner = corr.pmap()->owner(ordinal); if (rank == 1) { BOOST_TEST(corr.owner({idx[0]}) == owner); @@ -502,8 +502,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_local, TestParam, test_params) { BOOST_CHECK_THROW(corr.is_local(bad_idx), TiledArray::Exception); } - for (auto idx : corr.range()) { - const auto ordinal = corr.range().ordinal(idx); + for (auto idx : corr.tiles_range()) { + const auto ordinal = corr.tiles_range().ordinal(idx); BOOST_TEST(corr.is_local(idx) == corr.pmap()->is_local(ordinal)); BOOST_TEST(corr.is_local(ordinal) == corr.pmap()->is_local(ordinal)); } @@ -537,8 +537,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_local_init_list, TestParam, test_params) { BOOST_CHECK_THROW(corr.is_local(il2), except_t); } - for (auto idx : corr.range()) { - const auto ordinal = corr.range().ordinal(idx); + for (auto idx : corr.tiles_range()) { + const auto ordinal = corr.tiles_range().ordinal(idx); const auto is_local = corr.pmap()->is_local(ordinal); if (rank == 1) { BOOST_TEST(corr.is_local({idx[0]}) == is_local); @@ -571,8 +571,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero, TestParam, test_params) { BOOST_CHECK_THROW(corr.is_zero(bad_idx), TiledArray::Exception); } - for (auto idx : corr.range()) { - const auto ordinal = corr.range().ordinal(idx); + for (auto idx : corr.tiles_range()) { + const auto ordinal = corr.tiles_range().ordinal(idx); BOOST_TEST(corr.is_zero(idx) == corr.shape().is_zero(ordinal)); BOOST_TEST(corr.owner(ordinal) == corr.pmap()->owner(ordinal)); } @@ -606,8 +606,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(is_zero_init_list, TestParam, test_params) { BOOST_CHECK_THROW(corr.is_zero(il2), except_t); } - for (auto idx : corr.range()) { - const auto ordinal = corr.range().ordinal(idx); + for (auto idx : corr.tiles_range()) { + const auto ordinal = corr.tiles_range().ordinal(idx); const auto is_zero = corr.shape().is_zero(ordinal); if (rank == 1) { BOOST_TEST(corr.is_zero({idx[0]}) == is_zero); diff --git a/tests/tot_expressions.cpp b/tests/tot_expressions.cpp index 1c7feb11b7..c834810065 100644 --- a/tests/tot_expressions.cpp +++ b/tests/tot_expressions.cpp @@ -27,7 +27,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(no_perm, TestParam, test_params) { auto& in_rank = std::get<1>(tr_t); auto& t = std::get<2>(tr_t); - std::string out_idx = t.range().rank() == 1 ? "i" : "i, j"; + std::string out_idx = t.tiles_range().rank() == 1 ? "i" : "i, j"; std::string in_idx = in_rank == 1 ? "k" : "k, l"; std::string idx = out_idx + ";" + in_idx; @@ -42,7 +42,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(permute_outer, TestParam, test_params) { auto& in_rank = std::get<1>(tr_t); auto& t = std::get<2>(tr_t); - if (t.range().rank() == 1) continue; + if (t.tiles_range().rank() == 1) continue; std::string rhs_out_idx = "i, j"; std::string lhs_out_idx = "j, i"; @@ -52,7 +52,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(permute_outer, TestParam, test_params) { tensor_type result; result(lhs_idx) = t(rhs_idx); - for (auto tile_idx : t.range()) { + for (auto tile_idx : t.tiles_range()) { auto rtile = t.find(tile_idx).get(); auto ltile = result.find({tile_idx[1], tile_idx[0]}).get(); for (auto outer_idx : ltile.range()) { @@ -79,13 +79,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(permute_inner, TestParam, test_params) { std::string rhs_in_idx = "i, j"; std::string lhs_in_idx = "j, i"; - std::string out_idx = t.range().rank() == 1 ? "k" : "k, l"; + std::string out_idx = t.tiles_range().rank() == 1 ? "k" : "k, l"; std::string rhs_idx = out_idx + ";" + rhs_in_idx; std::string lhs_idx = out_idx + ";" + lhs_in_idx; tensor_type result; result(lhs_idx) = t(rhs_idx); - for (auto tile_idx : t.range()) { + for (auto tile_idx : t.tiles_range()) { auto rtile = t.find(tile_idx).get(); auto ltile = result.find(tile_idx).get(); bool same_outer_range = ltile.range() == rtile.range(); From 760bfa3a4a17d31ead985a3106931b3764fc9cbe Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 6 May 2022 08:33:15 -0400 Subject: [PATCH 03/30] ta_test: UM expr tests need looser FP64 comparison tolerance --- tests/expressions_cuda_um.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expressions_cuda_um.cpp b/tests/expressions_cuda_um.cpp index 15cdf2146d..0a9e057f6b 100644 --- a/tests/expressions_cuda_um.cpp +++ b/tests/expressions_cuda_um.cpp @@ -123,7 +123,7 @@ struct UMExpressionsFixture : public TiledRangeFixture { TArrayUMD u; TArrayUMD v; TArrayUMD w; - double tolerance = 1.0e-14; + static constexpr double tolerance = 5.0e-14; }; // UMExpressionsFixture // Instantiate static variables for fixture From 4be43ff22b7a82604243755e744564237316ccd8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 6 May 2022 08:34:18 -0400 Subject: [PATCH 04/30] introduced check_serial{,-tiledarray} targets --- CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a972c76b4..88020f8fab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -354,14 +354,18 @@ add_subdirectory(doc) ########################## include(CTest) if (BUILD_TESTING) - set(_ctest_args -V -R "tiledarray/unit") + set(_ctest_args -V -R "tiledarray/unit/run-np.*") + set(_ctest_args_serial -V -R "tiledarray/unit/run-np1") if (DEFINED TA_UT_CTEST_TIMEOUT) list(APPEND _ctest_args --timeout ${TA_UT_CTEST_TIMEOUT}) + list(APPEND _ctest_args_serial --timeout ${TA_UT_CTEST_TIMEOUT}) endif(DEFINED TA_UT_CTEST_TIMEOUT) add_custom_target_subproject(tiledarray check USES_TERMINAL COMMAND ${CMAKE_CTEST_COMMAND} ${_ctest_args}) + add_custom_target_subproject(tiledarray check_serial USES_TERMINAL COMMAND ${CMAKE_CTEST_COMMAND} ${_ctest_args_serial}) add_subdirectory(tests) else() add_custom_target_subproject(tiledarray check USES_TERMINAL COMMAND echo "WARNING: unit testing disabled. To enable, give -DBUILD_TESTING=ON to cmake") + add_custom_target_subproject(tiledarray check_serial USES_TERMINAL COMMAND echo "WARNING: unit testing disabled. To enable, give -DBUILD_TESTING=ON to cmake") endif() ########################## From 862c0141723dcf399b1053ab4205e7ae18ed372f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 6 May 2022 08:35:08 -0400 Subject: [PATCH 05/30] gitlab ci: run serial unit tests with CUDA + qualify all targets with -tiledarray where possible --- .gitlab-ci.yml | 4 ++-- CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 11f03acf70..ce49b4dc01 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,7 +7,7 @@ default: variables: MAD_NUM_THREADS : 2 - TA_TARGETS : "tiledarray examples ta_test check-tiledarray" + TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray" # Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT TA_CONFIG : > CMAKE_BUILD_TYPE=${BUILD_TYPE} @@ -70,4 +70,4 @@ ubuntu: CXX: [ g++ ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] - TA_TARGETS : [ "tiledarray examples" ] + TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ] diff --git a/CMakeLists.txt b/CMakeLists.txt index 88020f8fab..9a4b96c3e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,7 +355,7 @@ add_subdirectory(doc) include(CTest) if (BUILD_TESTING) set(_ctest_args -V -R "tiledarray/unit/run-np.*") - set(_ctest_args_serial -V -R "tiledarray/unit/run-np1") + set(_ctest_args_serial -V -R "tiledarray/unit/run-np-1") if (DEFINED TA_UT_CTEST_TIMEOUT) list(APPEND _ctest_args --timeout ${TA_UT_CTEST_TIMEOUT}) list(APPEND _ctest_args_serial --timeout ${TA_UT_CTEST_TIMEOUT}) From e263f997764a72e82cac957540782141e7d4f6e9 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 6 May 2022 10:15:21 -0400 Subject: [PATCH 06/30] ccd example avoids using deprecated DistArray::range() --- examples/cc/ccd.cpp | 15 ++++++++------- examples/cc/ccsd.cpp | 9 +++++---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/examples/cc/ccd.cpp b/examples/cc/ccd.cpp index 18106f34c1..2560048d26 100644 --- a/examples/cc/ccd.cpp +++ b/examples/cc/ccd.cpp @@ -96,27 +96,28 @@ int main(int argc, char** argv) { TiledArray::TSpArrayD t_aa_vvoo(world, v_aa_vvoo.trange(), v_aa_vvoo.shape()); - for (auto it = t_aa_vvoo.range().begin(); it != t_aa_vvoo.range().end(); - ++it) + for (auto it = t_aa_vvoo.tiles_range().begin(); + it != t_aa_vvoo.tiles_range().end(); ++it) if (t_aa_vvoo.is_local(*it) && (!t_aa_vvoo.is_zero(*it))) t_aa_vvoo.set(*it, 0.0); TiledArray::TSpArrayD t_ab_vvoo(world, v_ab_vvoo.trange(), v_ab_vvoo.shape()); - for (auto it = t_ab_vvoo.range().begin(); it != t_ab_vvoo.range().end(); - ++it) + for (auto it = t_ab_vvoo.tiles_range().begin(); + it != t_ab_vvoo.tiles_range().end(); ++it) if (t_ab_vvoo.is_local(*it) && (!t_ab_vvoo.is_zero(*it))) t_ab_vvoo.set(*it, 0.0); TiledArray::TSpArrayD t_bb_vvoo(world, v_bb_vvoo.trange(), v_bb_vvoo.shape()); - for (auto it = t_bb_vvoo.range().begin(); it != t_bb_vvoo.range().end(); - ++it) + for (auto it = t_bb_vvoo.tiles_range().begin(); + it != t_bb_vvoo.tiles_range().end(); ++it) if (t_bb_vvoo.is_local(*it) && (!t_bb_vvoo.is_zero(*it))) t_bb_vvoo.set(*it, 0.0); TiledArray::TSpArrayD D_vvoo(world, v_ab_vvoo.trange(), v_ab_vvoo.shape()); - for (auto it = D_vvoo.range().begin(); it != D_vvoo.range().end(); ++it) + for (auto it = D_vvoo.tiles_range().begin(); + it != D_vvoo.tiles_range().end(); ++it) if (D_vvoo.is_local(*it) && (!D_vvoo.is_zero(*it))) D_vvoo.set(*it, world.taskq.add(data, &InputData::make_D_vvoo_tile, D_vvoo.trange().make_tile_range(*it))); diff --git a/examples/cc/ccsd.cpp b/examples/cc/ccsd.cpp index 47a29686fa..f06b53edf1 100644 --- a/examples/cc/ccsd.cpp +++ b/examples/cc/ccsd.cpp @@ -128,15 +128,16 @@ int main(int argc, char** argv) { // // // TArray2s D_vo(world, f_a_vo.trange(), f_a_vo.shape()); - // for(TArray2s::range_type::const_iterator it = D_vo.range().begin(); it - // != D_vo.range().end(); ++it) + // for(TArray2s::range_type::const_iterator it = + // D_vo.tiles_range().begin(); it + // != D_vo.tiles_range().end(); ++it) // if(D_vo.is_local(*it) && (! D_vo.is_zero(*it))) // D_vo.set(*it, world.taskq.add(data, & InputData::make_D_vo_tile, // D_vo.trange().make_tile_range(*it))); // // TArray4s D_vvoo(world, v_ab_vvoo.trange(), v_ab_vvoo.shape()); - // for(TArray4s::range_type::const_iterator it = D_vvoo.range().begin(); - // it != D_vvoo.range().end(); ++it) + // for(TArray4s::range_type::const_iterator it = + // D_vvoo.tiles_range().begin(); it != D_vvoo.tiles_range().end(); ++it) // if(D_vvoo.is_local(*it) && (! D_vvoo.is_zero(*it))) // D_vvoo.set(*it, world.taskq.add(data, & // InputData::make_D_vvoo_tile, From e0dfb455850000ff1c0c478a39a755ca2f321293 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 6 May 2022 13:35:55 -0400 Subject: [PATCH 07/30] print nvcc version info and output of nvidia-smi --- ci/.build-project | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/.build-project b/ci/.build-project index 79a08d541b..a9c9f7582a 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -81,6 +81,8 @@ fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then cmd "make -C /home/ValeevGroup install/cuda" cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" + cmd "${CUDACXX} -V" + cmd "find / -name \"*nvidia-smi\"" fi section_end preparing_system_section From c172a9895c450b9b888591130f636f149a6d9f21 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 8 May 2022 22:47:46 -0400 Subject: [PATCH 08/30] vector::data() may not return null ptr if empty --- tests/tensor_um.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/tensor_um.cpp b/tests/tensor_um.cpp index 310e04234f..33efbfd7d4 100644 --- a/tests/tensor_um.cpp +++ b/tests/tensor_um.cpp @@ -87,8 +87,7 @@ struct TensorUMFixture { const TensorUMFixture::range_type TensorUMFixture::r = make_range(81); -BOOST_FIXTURE_TEST_SUITE(tensor_um_suite, TensorUMFixture, - TA_UT_LABEL_SERIAL) +BOOST_FIXTURE_TEST_SUITE(tensor_um_suite, TensorUMFixture, TA_UT_LABEL_SERIAL) BOOST_AUTO_TEST_CASE(default_constructor) { // check constructor @@ -98,7 +97,6 @@ BOOST_AUTO_TEST_CASE(default_constructor) { BOOST_CHECK(x.empty()); // Check that range data is correct - BOOST_CHECK_EQUAL(x.data(), static_cast(NULL)); BOOST_CHECK_EQUAL(x.size(), 0ul); BOOST_CHECK_EQUAL(x.range().volume(), 0ul); From 9c355bd1bba2423e450b48e800c96e8186b2dbf4 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 9 May 2022 10:42:14 -0400 Subject: [PATCH 09/30] bump up UT test timeout to accomodate CUDA builds --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ce49b4dc01..6ab502b527 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,7 +12,7 @@ variables: TA_CONFIG : > CMAKE_BUILD_TYPE=${BUILD_TYPE} TA_ASSERT_POLICY=TA_ASSERT_THROW - TA_UT_CTEST_TIMEOUT=2000 + TA_UT_CTEST_TIMEOUT=3000 ${TA_PYTHON} ${ENABLE_CUDA} ${BLA_VENDOR} From 3c14fa48ec1f792e0dbc2c84c2be51c6ddd14a90 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 13 May 2022 14:42:23 -0400 Subject: [PATCH 10/30] added UT um_expressions_suite/scal_add_block --- tests/expressions_cuda_um.cpp | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/expressions_cuda_um.cpp b/tests/expressions_cuda_um.cpp index 0a9e057f6b..e5e810e29d 100644 --- a/tests/expressions_cuda_um.cpp +++ b/tests/expressions_cuda_um.cpp @@ -520,6 +520,39 @@ BOOST_AUTO_TEST_CASE(scal_block) { } } +BOOST_AUTO_TEST_CASE(scal_add_block) { + Permutation perm({2, 1, 0}); + BlockRange block_range(a.trange().tiles_range(), {3, 3, 3}, {5, 5, 5}); + + BOOST_REQUIRE_NO_THROW(c("a,b,c") = + 2 * (3 * a("a,b,c").block({3, 3, 3}, {5, 5, 5}) + + 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5}))); + + std::cout << "expr tree for c(\"a,b,c\") =\n" + " 2 * (3 * a(\"a,b,c\").block({3, " + "3, 3}, {5, 5, 5}) +\n" + " 4 * b(\"a,b,c\").block({3, " + "3, 3}, {5, 5, 5})):\n" + << c("a,b,c") + << 2 * (3 * a("a,b,c").block({3, 3, 3}, {5, 5, 5}) + + 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5})); + + for (std::size_t index = 0ul; index < block_range.volume(); ++index) { + if (!a.is_zero(block_range.ordinal(index)) && + !b.is_zero(block_range.ordinal(index))) { + auto a_tile = a.find(block_range.ordinal(index)).get(); + auto b_tile = b.find(block_range.ordinal(index)).get(); + auto result_tile = c.find(index).get(); + + for (std::size_t j = 0ul; j < result_tile.range().volume(); ++j) { + BOOST_CHECK_EQUAL(result_tile[j], 2 * (3 * a_tile[j] + 4 * b_tile[j])); + } + } else { + BOOST_CHECK(c.is_zero(index)); + } + } +} + BOOST_AUTO_TEST_CASE(permute_block) { Permutation perm({2, 1, 0}); BlockRange block_range(a.trange().tiles_range(), {3, 3, 3}, {5, 5, 5}); @@ -965,6 +998,10 @@ BOOST_AUTO_TEST_CASE(scale_add_permute) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) + (3 * b("a,b,c"))); + std::cout << "expr tree for c(\"a,b,c\") = 5 * (2 * a(\"c,b,a\")) + (3 * " + "b(\"a,b,c\")))" + << c("a,b,c") << (5 * (2 * a("c,b,a")) + (3 * b("a,b,c"))); + for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); const size_t perm_index = From 18a68fb21bde20ab6095b7463f5f407474389d83 Mon Sep 17 00:00:00 2001 From: Victor Anisimov Date: Tue, 5 Jul 2022 10:07:07 -0500 Subject: [PATCH 11/30] Integrate CUDA-HIP-SYCL version of LibreTT --- INSTALL.md | 4 +- bin/admin/dependency-versions-update-hook.py | 10 +- examples/cuda/CMakeLists.txt | 2 +- .../cuda/{cuda_cutt.cpp => cuda_librett.cpp} | 2 +- external/cuda.cmake | 4 +- external/{cutt.cmake => librett.cmake} | 97 ++++++------ external/versions.cmake | 4 +- src/CMakeLists.txt | 4 +- src/TiledArray/cuda/btas_um_tensor.h | 4 +- .../external/{cutt.h => ta-librett.h} | 31 ++-- src/TiledArray/tiledarray.cpp | 8 +- tests/CMakeLists.txt | 2 +- tests/{cutt.cpp => librett.cpp} | 141 +++++++++--------- 13 files changed, 161 insertions(+), 152 deletions(-) rename examples/cuda/{cuda_cutt.cpp => cuda_librett.cpp} (98%) rename external/{cutt.cmake => librett.cmake} (53%) rename src/TiledArray/external/{cutt.h => ta-librett.h} (80%) rename tests/{cutt.cpp => librett.cpp} (81%) diff --git a/INSTALL.md b/INSTALL.md index 3606a2bd25..c06535172e 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -64,7 +64,7 @@ Compiling BTAS requires the following prerequisites: Optional prerequisites: - [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on CUDA-enabled accelerators. CUDA 11 or later is required. Support for CUDA also requires the following additional prerequisites, both of which will be built and installed automatically if missing: - - [cuTT](github.com/ValeevGroup/cutt) -- CUDA transpose library; note that our fork of the [original cuTT repo](github.com/ap-hynninen/cutt) is required to provide thread-safety (tag 0e8685bf82910bc7435835f846e88f1b39f47f09). + - [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, HIP, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) with our additional thread-safety improvements (tag 68abe31a9ec6fd2fd9ffbcd874daa80457f947da). - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag f9640e0fa4245691cdd434e4f719ac5f7d455f82). - [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later). - [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing: @@ -329,7 +329,7 @@ Support for execution on CUDA-enabled hardware is controlled by the following va * `ENABLE_CUDA` -- Set to `ON` to turn on CUDA support. [Default=OFF]. * `CMAKE_CUDA_HOST_COMPILER` -- Set to the path to the host C++ compiler to be used by CUDA compiler. CUDA compilers used to be notorious for only being able to use specific C++ host compilers, but support for more recent C++ host compilers has improved. The default is determined by the CUDA compiler and the user environment variables (`PATH` etc.). * `ENABLE_CUDA_ERROR_CHECK` -- Set to `ON` to turn on assertions for successful completion of calls to CUDA runtime and libraries. [Default=OFF]. -* `CUTT_INSTALL_DIR` -- the installation prefix of the pre-installed cuTT library. This should not be normally needed; it is strongly recommended to let TiledArray build and install cuTT. +* `LIBRETT_INSTALL_DIR` -- the installation prefix of the pre-installed LibreTT library. This should not be normally needed; it is strongly recommended to let TiledArray build and install LibreTT. * `UMPIRE_INSTALL_DIR` -- the installation prefix of the pre-installed Umpire library. This should not be normally needed; it is strongly recommended to let TiledArray build and install Umpire. For the CUDA compiler and toolkit to be discoverable the CUDA compiler (`nvcc`) should be in the `PATH` environment variable. Refer to the [FindCUDAToolkit module](https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html) for more info. diff --git a/bin/admin/dependency-versions-update-hook.py b/bin/admin/dependency-versions-update-hook.py index 19b7123703..686b98b49a 100755 --- a/bin/admin/dependency-versions-update-hook.py +++ b/bin/admin/dependency-versions-update-hook.py @@ -106,11 +106,11 @@ def replace_dep_id(topsrc, file_ext, dep_name, old_id, new_id, search_prefix = ' btas_old_tag = tokens[2] else: btas_new_tag = tokens[2] - elif tokens[1].find('CUTT') != -1: + elif tokens[1].find('LIBRETT') != -1: if tokens[1].find('PREVIOUS') != -1: - cutt_old_tag = tokens[2] + librett_old_tag = tokens[2] else: - cutt_new_tag = tokens[2] + librett_new_tag = tokens[2] elif tokens[1].find('UMPIRE') != -1: if tokens[1].find('PREVIOUS') != -1: umpire_old_tag = tokens[2] @@ -146,8 +146,8 @@ def replace_dep_id(topsrc, file_ext, dep_name, old_id, new_id, search_prefix = ' # BTAS tag in INSTALL.md any_files_changed |= replace_dep_id(topsrc, 'md', 'BTAS', btas_old_tag, btas_new_tag, 'ValeevGroup/BTAS), tag ', '') -# cuTT tag in INSTALL.md -any_files_changed |= replace_dep_id(topsrc, 'md', 'cuTT', cutt_old_tag, cutt_new_tag, '', '') +# LibreTT tag in INSTALL.md +any_files_changed |= replace_dep_id(topsrc, 'md', 'LibreTT', librett_old_tag, librett_new_tag, '', '') # Umpire tag in INSTALL.md any_files_changed |= replace_dep_id(topsrc, 'md', 'Umpire', umpire_old_tag, umpire_new_tag, '', '') diff --git a/examples/cuda/CMakeLists.txt b/examples/cuda/CMakeLists.txt index 2f6affe700..5d7f56c86e 100644 --- a/examples/cuda/CMakeLists.txt +++ b/examples/cuda/CMakeLists.txt @@ -25,7 +25,7 @@ if(CUDA_FOUND) - foreach(_exec cuda_cutt cuda_task ta_dense_cuda ta_cc_abcd_cuda ta_vector_cuda ta_reduce_cuda) + foreach(_exec cuda_librett cuda_task ta_dense_cuda ta_cc_abcd_cuda ta_vector_cuda ta_reduce_cuda) # Add executable add_ta_executable(${_exec} "${_exec}.cpp" "tiledarray") diff --git a/examples/cuda/cuda_cutt.cpp b/examples/cuda/cuda_librett.cpp similarity index 98% rename from examples/cuda/cuda_cutt.cpp rename to examples/cuda/cuda_librett.cpp index edaefc2597..a916bfc729 100644 --- a/examples/cuda/cuda_cutt.cpp +++ b/examples/cuda/cuda_librett.cpp @@ -29,7 +29,7 @@ #include /** - * Test cuTT + * Test LibreTT */ const std::size_t N = 100; diff --git a/external/cuda.cmake b/external/cuda.cmake index 1e5ebd8d60..3b2eb6ce37 100644 --- a/external/cuda.cmake +++ b/external/cuda.cmake @@ -42,6 +42,6 @@ message(STATUS "CMAKE Implicit Link Directories: ${CMAKE_CUDA_IMPLICIT_LINK_DIRE include(external/umpire.cmake) ## -## cuTT +## LibreTT ## -include(external/cutt.cmake) +include(external/librett.cmake) diff --git a/external/cutt.cmake b/external/librett.cmake similarity index 53% rename from external/cutt.cmake rename to external/librett.cmake index dbf4e94f91..a238f3af92 100644 --- a/external/cutt.cmake +++ b/external/librett.cmake @@ -1,48 +1,48 @@ ## -## find cuTT +## find LibreTT ## -find_path(_CUTT_INSTALL_DIR NAMES include/cutt.h lib/libcutt.a HINTS ${CUTT_INSTALL_DIR}) +find_path(_LIBRETT_INSTALL_DIR NAMES include/librett.h lib/librett.a HINTS ${LIBRETT_INSTALL_DIR}) -if( _CUTT_INSTALL_DIR ) +if( _LIBRETT_INSTALL_DIR ) - message(STATUS "cuTT found at ${_CUTT_INSTALL_DIR}") + message(STATUS "LibreTT found at ${_LIBRETT_INSTALL_DIR}") elseif(TA_EXPERT) - message("** cuTT was not found") - message(STATUS "** Downloading and building cuTT is explicitly disabled in EXPERT mode") + message("** LibreTT was not found") + message(STATUS "** Downloading and building LibreTT is explicitly disabled in EXPERT mode") else() - # TODO need to fix the auto installation of cuTT + # TODO need to fix the auto installation of LibreTT include(ExternalProject) # to pass CMAKE_C_* vars to external project enable_language(C) - # set source and build path for cuTT in the TiledArray project - set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/cutt-src) - # cutt only supports in source build - set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/cutt-build) + # set source and build path for LibreTT in the TiledArray project + set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/librett-src) + # librett only supports in source build + set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/librett-build) set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}) - if (NOT CUTT_URL) - set(CUTT_URL https://github.com/ValeevGroup/cutt.git) - endif (NOT CUTT_URL) - if (NOT CUTT_TAG) - set(CUTT_TAG ${TA_TRACKED_CUTT_TAG}) - endif (NOT CUTT_TAG) + if (NOT LIBRETT_URL) + set(LIBRETT_URL https://github.com/victor-anisimov/librett.git) + endif (NOT LIBRETT_URL) + if (NOT LIBRETT_TAG) + set(LIBRETT_TAG ${TA_TRACKED_LIBRETT_TAG}) + endif (NOT LIBRETT_TAG) - message("** Will clone cuTT from ${CUTT_URL}") + message("** Will clone LibreTT from ${LIBRETT_URL}") # need to change the separator of list to avoid issues with ExternalProject parsing # set(CUDA_FLAGS "${CUDA_NVCC_FLAGS}") # string(REPLACE ";" "::" CUDA_FLAGS "${CUDA_NVCC_FLAGS}") #message(STATUS "CUDA_FLAGS: " "${CUDA_FLAGS}") - set(CUTT_CMAKE_ARGS + set(LIBRETT_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_DIR} -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE} @@ -66,87 +66,88 @@ else() -DCMAKE_CUDA_STANDARD=${CMAKE_CUDA_STANDARD} -DCMAKE_CUDA_EXTENSIONS=${CMAKE_CUDA_EXTENSIONS} -DENABLE_UMPIRE=OFF - -DCUTT_USES_THIS_UMPIRE_ALLOCATOR=ThreadSafeUMDynamicPool + -DLIBRETT_USES_THIS_UMPIRE_ALLOCATOR=ThreadSafeUMDynamicPool -DCMAKE_PREFIX_PATH=${_UMPIRE_INSTALL_DIR} -DENABLE_NO_ALIGNED_ALLOC=ON -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} + -DENABLE_CUDA=ON ) if (DEFINED CMAKE_CUDA_ARCHITECTURES) - list(APPEND CUTT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}) + list(APPEND LIBRETT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}) endif(DEFINED CMAKE_CUDA_ARCHITECTURES) if (CMAKE_TOOLCHAIN_FILE) - set(CUTT_CMAKE_ARGS "${CUTT_CMAKE_ARGS}" + set(LIBRETT_CMAKE_ARGS "${LIBRETT_CMAKE_ARGS}" "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") endif(CMAKE_TOOLCHAIN_FILE) if (BUILD_SHARED_LIBS) - set(CUTT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) else(BUILD_SHARED_LIBS) - set(CUTT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) endif(BUILD_SHARED_LIBS) # N.B. Ninja needs spelling out the byproducts of custom targets, see https://cmake.org/cmake/help/v3.3/policy/CMP0058.html - set(CUTT_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/src/libcutt${CUTT_DEFAULT_LIBRARY_SUFFIX}") - message(STATUS "custom target cutt is expected to build these byproducts: ${CUTT_BUILD_BYPRODUCTS}") + set(LIBRETT_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/src/librett${LIBRETT_DEFAULT_LIBRARY_SUFFIX}") + message(STATUS "custom target librett is expected to build these byproducts: ${LIBRETT_BUILD_BYPRODUCTS}") - ExternalProject_Add(cutt + ExternalProject_Add(librett PREFIX ${CMAKE_INSTALL_PREFIX} - STAMP_DIR ${FETCHCONTENT_BASE_DIR}/cutt-ep-artifacts - TMP_DIR ${FETCHCONTENT_BASE_DIR}/cutt-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable + STAMP_DIR ${FETCHCONTENT_BASE_DIR}/librett-ep-artifacts + TMP_DIR ${FETCHCONTENT_BASE_DIR}/librett-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable #--Download step-------------- DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} - GIT_REPOSITORY ${CUTT_URL} - GIT_TAG ${CUTT_TAG} + GIT_REPOSITORY ${LIBRETT_URL} + GIT_TAG ${LIBRETT_TAG} #--Configure step------------- SOURCE_DIR ${EXTERNAL_SOURCE_DIR} LIST_SEPARATOR :: UPDATE_DISCONNECTED 1 CMAKE_ARGS - ${CUTT_CMAKE_ARGS} + ${LIBRETT_CMAKE_ARGS} ${EXTERNAL_SOURCE_DIR} #--Build step----------------- BINARY_DIR ${EXTERNAL_BUILD_DIR} - BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cutt -v - BUILD_BYPRODUCTS ${CUTT_BUILD_BYPRODUCTS} + BUILD_COMMAND ${CMAKE_COMMAND} --build . --target librett -v + BUILD_BYPRODUCTS ${LIBRETT_BUILD_BYPRODUCTS} #--Install step--------------- - INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "cuTT will be installed during TiledArray's installation." + INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "LibreTT will be installed during TiledArray's installation." #--Custom targets------------- STEP_TARGETS build ) - # TiledArray_CUTT target depends on existence of this directory to be usable from the build tree at configure time + # TiledArray_LIBRETT target depends on existence of this directory to be usable from the build tree at configure time execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${EXTERNAL_SOURCE_DIR}/src") - # do install of cuTT as part of building TiledArray's install target + # do install of LibreTT as part of building TiledArray's install target install(CODE "execute_process( COMMAND \"${CMAKE_COMMAND}\" \"--build\" \".\" \"--target\" \"install\" WORKING_DIRECTORY \"${EXTERNAL_BUILD_DIR}\" RESULT_VARIABLE error_code) if(error_code) - message(FATAL_ERROR \"Failed to install cuTT\") + message(FATAL_ERROR \"Failed to install LibreTT\") endif() ") - # Add cuTT dependency to External - add_dependencies(External-tiledarray cutt-build) + # Add LibreTT dependency to External + add_dependencies(External-tiledarray librett-build) - set(_CUTT_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}) + set(_LIBRETT_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}) -endif(_CUTT_INSTALL_DIR) +endif(_LIBRETT_INSTALL_DIR) -add_library(TiledArray_CUTT INTERFACE) +add_library(TiledArray_LIBRETT INTERFACE) -set_target_properties(TiledArray_CUTT +set_target_properties(TiledArray_LIBRETT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "$;$" + "$;$" INTERFACE_LINK_LIBRARIES - "$;$" + "$;$" ) -install(TARGETS TiledArray_CUTT EXPORT tiledarray COMPONENT tiledarray) +install(TARGETS TiledArray_LIBRETT EXPORT tiledarray COMPONENT tiledarray) -#TODO test cuTT +#TODO test LibreTT diff --git a/external/versions.cmake b/external/versions.cmake index 4ac855e249..c1120147d9 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -27,8 +27,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) set(TA_TRACKED_BTAS_TAG 242871710dabd5ef337e5253000d3e38c1d977ba) set(TA_TRACKED_BTAS_PREVIOUS_TAG db884b020b5c13c312c07df9d5c03cea2d65afb2) -set(TA_TRACKED_CUTT_TAG 0e8685bf82910bc7435835f846e88f1b39f47f09) -set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3) +set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da) +set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 7e27ac766a9038df6aa05613784a54a036c4b796) set(TA_TRACKED_UMPIRE_TAG f9640e0fa4245691cdd434e4f719ac5f7d455f82) set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f5ed90793b..d6f055df8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -197,7 +197,7 @@ if(CUDA_FOUND) list(APPEND TILEDARRAY_HEADER_FILES TiledArray/external/cuda.h - TiledArray/external/cutt.h + TiledArray/external/ta-librett.h TiledArray/cuda/cublas.h TiledArray/cuda/btas_cublas.h TiledArray/cuda/btas_um_tensor.h @@ -245,7 +245,7 @@ if(CUDA_FOUND) LANGUAGE CUDA) # the list of libraries on which TiledArray depends on - list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_CUTT) + list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_LIBRETT) endif(CUDA_FOUND) diff --git a/src/TiledArray/cuda/btas_um_tensor.h b/src/TiledArray/cuda/btas_um_tensor.h index d6012f00f1..2ec1fb9a6d 100644 --- a/src/TiledArray/cuda/btas_um_tensor.h +++ b/src/TiledArray/cuda/btas_um_tensor.h @@ -32,7 +32,7 @@ #include #include -#include +#include #include namespace TiledArray { @@ -187,7 +187,7 @@ btasUMTensorVarray permute(const btasUMTensorVarray &arg, std::move(storage)); // invoke the permute function - cutt_permute(const_cast(device_data(arg.storage())), + librett_permute(const_cast(device_data(arg.storage())), device_data(result.storage()), arg.range(), perm, stream); synchronize_stream(&stream); diff --git a/src/TiledArray/external/cutt.h b/src/TiledArray/external/ta-librett.h similarity index 80% rename from src/TiledArray/external/cutt.h rename to src/TiledArray/external/ta-librett.h index a2a31ec20d..bc0da4de8a 100644 --- a/src/TiledArray/external/cutt.h +++ b/src/TiledArray/external/ta-librett.h @@ -21,8 +21,8 @@ * */ -#ifndef TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED -#define TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED +#ifndef TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED +#define TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED #include @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include @@ -77,38 +77,39 @@ inline void permutation_to_col_major(std::vector& perm) { * @param stream the CUDA stream this permutation will be submitted to */ template -void cutt_permute(T* inData, T* outData, const TiledArray::Range& range, +void librett_permute(T* inData, T* outData, const TiledArray::Range& range, const TiledArray::Permutation& perm, cudaStream_t stream) { auto extent = range.extent(); std::vector extent_int(extent.begin(), extent.end()); - // cuTT uses FROM notation + // LibreTT uses FROM notation auto perm_inv = perm.inv(); std::vector perm_int(perm_inv.begin(), perm_inv.end()); - // cuTT uses ColMajor + // LibreTT uses ColMajor TiledArray::extent_to_col_major(extent_int); TiledArray::permutation_to_col_major(perm_int); - cuttResult_t status; + //librettResult_t status; + librettResult status; - cuttHandle plan; - status = cuttPlan(&plan, range.rank(), extent_int.data(), perm_int.data(), + librettHandle plan; + status = librettPlan(&plan, range.rank(), extent_int.data(), perm_int.data(), sizeof(T), stream); - TA_ASSERT(status == CUTT_SUCCESS); + TA_ASSERT(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, inData, outData); + status = librettExecute(plan, inData, outData); - TA_ASSERT(status == CUTT_SUCCESS); + TA_ASSERT(status == LIBRETT_SUCCESS); - status = cuttDestroy(plan); + status = librettDestroy(plan); - TA_ASSERT(status == CUTT_SUCCESS); + TA_ASSERT(status == LIBRETT_SUCCESS); } } // namespace TiledArray #endif // TILEDARRAY_HAS_CUDA -#endif // TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED +#endif // TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED diff --git a/src/TiledArray/tiledarray.cpp b/src/TiledArray/tiledarray.cpp index 29b60a61d6..226d2365ac 100644 --- a/src/TiledArray/tiledarray.cpp +++ b/src/TiledArray/tiledarray.cpp @@ -7,7 +7,7 @@ #ifdef TILEDARRAY_HAS_CUDA #include #include -#include +#include #endif namespace TiledArray { @@ -20,14 +20,14 @@ inline void cuda_initialize() { cudaEnv::instance(); // cuBLASHandlePool::handle(); - // initialize cuTT - cuttInitialize(); + // initialize LibreTT + librettInitialize(); } /// finalize cuda environment inline void cuda_finalize() { CudaSafeCall(cudaDeviceSynchronize()); - cuttFinalize(); + librettFinalize(); cublasDestroy(cuBLASHandlePool::handle()); delete &cuBLASHandlePool::handle(); cudaEnv::instance().reset(nullptr); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0fccf921b5..1ac73df189 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -116,7 +116,7 @@ set(ta_test_src_files ta_test.cpp ) if(CUDA_FOUND) - list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp) + list(APPEND ta_test_src_files librett.cpp expressions_cuda_um.cpp tensor_um.cpp) endif() # if tiledarray library was compiled without exceptions, use TA header-only (see below) diff --git a/tests/cutt.cpp b/tests/librett.cpp similarity index 81% rename from tests/cutt.cpp rename to tests/librett.cpp index 8a6b1af539..91c5b5b8ad 100644 --- a/tests/cutt.cpp +++ b/tests/librett.cpp @@ -27,8 +27,8 @@ #include #include "unit_test_config.h" -struct cuTTFixture { - // cuTTFixture() +struct LibreTTFixture { + // LibreTTFixture() // : A(100), // B(50), // C(20), @@ -36,16 +36,16 @@ struct cuTTFixture { // extent({100, 100}), // extent_nonsym({100, 50}), // perm({1, 0}) {} - cuTTFixture() : A(10), B(5), C(2) {} + LibreTTFixture() : A(10), B(5), C(2) {} int A; int B; int C; }; -BOOST_FIXTURE_TEST_SUITE(cutt_suite, cuTTFixture, TA_UT_LABEL_SERIAL); +BOOST_FIXTURE_TEST_SUITE(librett_suite, LibreTTFixture, TA_UT_LABEL_SERIAL); -BOOST_AUTO_TEST_CASE(cutt_gpu_mem) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem) { int* a_host = (int*)std::malloc(A * A * sizeof(int)); int* b_host = (int*)std::malloc(A * A * sizeof(int)); int iter = 0; @@ -68,17 +68,18 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); - cuttDestroy(plan); + BOOST_CHECK(status == LIBRETT_SUCCESS); + librettDestroy(plan); cudaMemcpy(b_host, b_device, A * A * sizeof(int), cudaMemcpyDeviceToHost); @@ -97,7 +98,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym) { int* a_host = (int*)std::malloc(A * B * sizeof(int)); int* b_host = (int*)std::malloc(A * B * sizeof(int)); int iter = 0; @@ -115,8 +116,9 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { cudaMemcpy(a_device, a_host, A * B * sizeof(int), cudaMemcpyHostToDevice); - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({B, A}); TiledArray::extent_to_col_major(extent); @@ -124,14 +126,14 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); - cuttDestroy(plan); + BOOST_CHECK(status == LIBRETT_SUCCESS); + librettDestroy(plan); cudaMemcpy(b_host, b_device, A * B * sizeof(int), cudaMemcpyDeviceToHost); @@ -150,7 +152,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_column_major) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_column_major) { int* a_host = (int*)std::malloc(A * B * C * sizeof(int)); int* b_host = (int*)std::malloc(A * B * C * sizeof(int)); int iter = 0; @@ -172,28 +174,29 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_column_major) { // b(j,i,k) = a(i,j,k) - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent3{int(A), int(B), int(C)}; std::vector perm3{1, 0, 2}; // std::vector perm3{0, 2, 1}; - status = cuttPlanMeasure(&plan, 3, extent3.data(), perm3.data(), sizeof(int), + status = librettPlanMeasure(&plan, 3, extent3.data(), perm3.data(), sizeof(int), 0, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); cudaMemcpy(b_host, b_device, A * B * C * sizeof(int), cudaMemcpyDeviceToHost); - status = cuttDestroy(plan); + status = librettDestroy(plan); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); iter = 0; for (std::size_t k = 0; k < C; k++) { @@ -212,7 +215,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_column_major) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_row_major) { int* a_host = (int*)std::malloc(A * B * C * sizeof(int)); int* b_host = (int*)std::malloc(A * B * C * sizeof(int)); int iter = 0; @@ -234,8 +237,9 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { // b(j,i,k) = a(i,j,k) - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({A, B, C}); TiledArray::extent_to_col_major(extent); @@ -243,20 +247,20 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { std::vector perm({1, 0, 2}); TiledArray::permutation_to_col_major(perm); - status = cuttPlanMeasure(&plan, 3, extent.data(), perm.data(), sizeof(int), 0, + status = librettPlanMeasure(&plan, 3, extent.data(), perm.data(), sizeof(int), 0, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); cudaMemcpy(b_host, b_device, A * B * C * sizeof(int), cudaMemcpyDeviceToHost); - status = cuttDestroy(plan); + status = librettDestroy(plan); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); iter = 0; for (std::size_t i = 0; i < A; i++) { @@ -275,7 +279,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_unified_mem) { +BOOST_AUTO_TEST_CASE(librett_unified_mem) { int* a_um; cudaMallocManaged(&a_um, A * A * sizeof(int)); @@ -290,8 +294,9 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem) { } } - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({A, A}); TiledArray::extent_to_col_major(extent); @@ -299,15 +304,15 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_um, b_um); + status = librettExecute(plan, a_um, b_um); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - cuttDestroy(plan); + librettDestroy(plan); cudaDeviceSynchronize(); @@ -323,7 +328,7 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem) { cudaFree(b_um); } -BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { +BOOST_AUTO_TEST_CASE(librett_unified_mem_nonsym) { int* a_um; cudaMallocManaged(&a_um, A * B * sizeof(int)); @@ -338,8 +343,9 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { } } - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({B, A}); TiledArray::extent_to_col_major(extent); @@ -347,15 +353,15 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_um, b_um); + status = librettExecute(plan, a_um, b_um); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - cuttDestroy(plan); + librettDestroy(plan); cudaDeviceSynchronize(); iter = 0; @@ -369,7 +375,7 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { cudaFree(b_um); } -BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { +BOOST_AUTO_TEST_CASE(librett_unified_mem_rank_three) { int* a_um; cudaMallocManaged(&a_um, A * B * C * sizeof(int)); @@ -386,8 +392,9 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { } } - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; // b(k,i,j) = a(i,j,k) @@ -397,15 +404,15 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { std::vector perm({2, 0, 1}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 3, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 3, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_um, b_um); + status = librettExecute(plan, a_um, b_um); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - cuttDestroy(plan); + librettDestroy(plan); cudaDeviceSynchronize(); iter = 0; @@ -421,7 +428,7 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { cudaFree(b_um); } -BOOST_AUTO_TEST_CASE(cutt_um_tensor) { +BOOST_AUTO_TEST_CASE(librett_um_tensor) { TiledArray::Range range{A, A}; using Tile = TiledArray::btasUMTensorVarray; @@ -453,7 +460,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_nonsym) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_nonsym) { TiledArray::Range range{B, A}; using Tile = TiledArray::btasUMTensorVarray; @@ -485,7 +492,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_nonsym) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_three) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_rank_three) { TiledArray::Range range{A, B, C}; using Tile = TiledArray::btasUMTensorVarray; @@ -540,7 +547,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_three) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_four) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_rank_four) { std::size_t a = 2; std::size_t b = 3; std::size_t c = 6; @@ -609,7 +616,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_four) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_six) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_rank_six) { std::size_t a = 2; std::size_t b = 3; std::size_t c = 6; From ae1f4038475c6d6af755469b7f4d848e8e9cbb9e Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 13 Jul 2022 14:29:14 -0400 Subject: [PATCH 12/30] ta-librett.h -> librett.h --- src/CMakeLists.txt | 2 +- src/TiledArray/cuda/btas_um_tensor.h | 119 +++++++++++------- .../external/{ta-librett.h => librett.h} | 6 +- 3 files changed, 79 insertions(+), 48 deletions(-) rename src/TiledArray/external/{ta-librett.h => librett.h} (95%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d6f055df8f..04281d4926 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -197,7 +197,7 @@ if(CUDA_FOUND) list(APPEND TILEDARRAY_HEADER_FILES TiledArray/external/cuda.h - TiledArray/external/ta-librett.h + TiledArray/external/librett.h TiledArray/cuda/cublas.h TiledArray/cuda/btas_cublas.h TiledArray/cuda/btas_um_tensor.h diff --git a/src/TiledArray/cuda/btas_um_tensor.h b/src/TiledArray/cuda/btas_um_tensor.h index 2ec1fb9a6d..7bddc4a178 100644 --- a/src/TiledArray/cuda/btas_um_tensor.h +++ b/src/TiledArray/cuda/btas_um_tensor.h @@ -32,7 +32,7 @@ #include #include -#include +#include #include namespace TiledArray { @@ -95,7 +95,8 @@ namespace TiledArray { /// gemm /// -template >> +template >> btasUMTensorVarray gemm( const btasUMTensorVarray &left, const btasUMTensorVarray &right, Scalar factor, @@ -103,7 +104,8 @@ btasUMTensorVarray gemm( return btas_tensor_gemm_cuda_impl(left, right, factor, gemm_helper); } -template >> +template >> void gemm(btasUMTensorVarray &result, const btasUMTensorVarray &left, const btasUMTensorVarray &right, Scalar factor, @@ -159,8 +161,8 @@ btasUMTensorVarray shift(const btasUMTensorVarray &arg, /// shift to /// template -btasUMTensorVarray& shift_to(btasUMTensorVarray &arg, - const Index &range_shift) { +btasUMTensorVarray &shift_to(btasUMTensorVarray &arg, + const Index &range_shift) { const_cast(arg.range()).inplace_shift(range_shift); return arg; } @@ -188,7 +190,7 @@ btasUMTensorVarray permute(const btasUMTensorVarray &arg, // invoke the permute function librett_permute(const_cast(device_data(arg.storage())), - device_data(result.storage()), arg.range(), perm, stream); + device_data(result.storage()), arg.range(), perm, stream); synchronize_stream(&stream); @@ -199,24 +201,29 @@ btasUMTensorVarray permute(const btasUMTensorVarray &arg, /// scale /// -template >> +template >> btasUMTensorVarray scale(const btasUMTensorVarray &arg, const Scalar factor) { detail::to_cuda(arg); return btas_tensor_scale_cuda_impl(arg, factor); } -template >> -btasUMTensorVarray& scale_to(btasUMTensorVarray &arg, const Scalar factor) { +template >> +btasUMTensorVarray &scale_to(btasUMTensorVarray &arg, + const Scalar factor) { detail::to_cuda(arg); btas_tensor_scale_to_cuda_impl(arg, factor); return arg; } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Range, typename Scalar, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray scale(const btasUMTensorVarray &arg, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = scale(arg, factor); // wait to finish before switch stream @@ -236,7 +243,9 @@ btasUMTensorVarray neg(const btasUMTensorVarray &arg) { return btas_tensor_scale_cuda_impl(arg, T(-1.0)); } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray neg(const btasUMTensorVarray &arg, const Perm &perm) { auto result = neg(arg); @@ -249,7 +258,7 @@ btasUMTensorVarray neg(const btasUMTensorVarray &arg, } template -btasUMTensorVarray& neg_to(btasUMTensorVarray &arg) { +btasUMTensorVarray &neg_to(btasUMTensorVarray &arg) { detail::to_cuda(arg); btas_tensor_scale_to_cuda_impl(arg, T(-1.0)); return arg; @@ -267,7 +276,8 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, return btas_tensor_subt_cuda_impl(arg1, arg2, T(1.0)); } -template >> +template >> btasUMTensorVarray subt(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Scalar factor) { @@ -276,7 +286,9 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, return result; } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray subt(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Perm &perm) { @@ -289,11 +301,13 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, return permute(result, perm); } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Scalar, typename Range, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray subt(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = subt(arg1, arg2, factor); // wait to finish before switch stream @@ -308,17 +322,20 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, /// template -btasUMTensorVarray& subt_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg1) { +btasUMTensorVarray &subt_to( + btasUMTensorVarray &result, + const btasUMTensorVarray &arg1) { detail::to_cuda(result); detail::to_cuda(arg1); btas_tensor_subt_to_cuda_impl(result, arg1, T(1.0)); return result; } -template >> -btasUMTensorVarray& subt_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg1, const Scalar factor) { +template >> +btasUMTensorVarray &subt_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg1, + const Scalar factor) { subt_to(result, arg1); btas_tensor_scale_to_cuda_impl(result, factor); return result; @@ -336,7 +353,8 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, return btas_tensor_add_cuda_impl(arg1, arg2, T(1.0)); } -template >> +template >> btasUMTensorVarray add(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Scalar factor) { @@ -345,11 +363,13 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, return result; } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Scalar, typename Range, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray add(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = add(arg1, arg2, factor); // wait to finish before switch stream @@ -359,7 +379,9 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, return permute(result, perm); } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray add(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Perm &perm) { @@ -377,17 +399,19 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, /// template -btasUMTensorVarray& add_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg) { +btasUMTensorVarray &add_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg) { detail::to_cuda(result); detail::to_cuda(arg); btas_tensor_add_to_cuda_impl(result, arg, T(1.0)); return result; } -template >> -btasUMTensorVarray& add_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg, const Scalar factor) { +template >> +btasUMTensorVarray &add_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg, + const Scalar factor) { add_to(result, arg); btas_tensor_scale_to_cuda_impl(result, factor); return result; @@ -416,7 +440,8 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, return btas_tensor_mult_cuda_impl(arg1, arg2); } -template >> +template >> btasUMTensorVarray mult(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Scalar factor) { @@ -425,7 +450,9 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, return result; } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray mult(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Perm &perm) { @@ -438,11 +465,13 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, return permute(result, perm); } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Range, typename Scalar, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray mult(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = mult(arg1, arg2, factor); // wait to finish before switch stream @@ -456,17 +485,19 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, /// mult to /// template -btasUMTensorVarray& mult_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg) { +btasUMTensorVarray &mult_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg) { detail::to_cuda(result); detail::to_cuda(arg); btas_tensor_mult_to_cuda_impl(result, arg); return result; } -template >> -btasUMTensorVarray& mult_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg, const Scalar factor) { +template >> +btasUMTensorVarray &mult_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg, + const Scalar factor) { mult_to(result, arg); btas_tensor_scale_to_cuda_impl(result, factor); return result; diff --git a/src/TiledArray/external/ta-librett.h b/src/TiledArray/external/librett.h similarity index 95% rename from src/TiledArray/external/ta-librett.h rename to src/TiledArray/external/librett.h index bc0da4de8a..46d116c45b 100644 --- a/src/TiledArray/external/ta-librett.h +++ b/src/TiledArray/external/librett.h @@ -78,7 +78,7 @@ inline void permutation_to_col_major(std::vector& perm) { */ template void librett_permute(T* inData, T* outData, const TiledArray::Range& range, - const TiledArray::Permutation& perm, cudaStream_t stream) { + const TiledArray::Permutation& perm, cudaStream_t stream) { auto extent = range.extent(); std::vector extent_int(extent.begin(), extent.end()); @@ -90,12 +90,12 @@ void librett_permute(T* inData, T* outData, const TiledArray::Range& range, TiledArray::extent_to_col_major(extent_int); TiledArray::permutation_to_col_major(perm_int); - //librettResult_t status; + // librettResult_t status; librettResult status; librettHandle plan; status = librettPlan(&plan, range.rank(), extent_int.data(), perm_int.data(), - sizeof(T), stream); + sizeof(T), stream); TA_ASSERT(status == LIBRETT_SUCCESS); From 25d456ede822dbd8c5a0f90775b79f7b06960ced Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 13 Jul 2022 14:31:45 -0400 Subject: [PATCH 13/30] minor verbiage fix --- INSTALL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL.md b/INSTALL.md index c06535172e..683a684f0b 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -64,7 +64,7 @@ Compiling BTAS requires the following prerequisites: Optional prerequisites: - [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on CUDA-enabled accelerators. CUDA 11 or later is required. Support for CUDA also requires the following additional prerequisites, both of which will be built and installed automatically if missing: - - [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, HIP, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) with our additional thread-safety improvements (tag 68abe31a9ec6fd2fd9ffbcd874daa80457f947da). + - [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, HIP, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) extended to provide thread-safety improvements (via github.com/ValeevGroup/cutt) and extended to non-CUDA platforms by [@victor-anisimov](github.com/victor-anisimov) (tag 68abe31a9ec6fd2fd9ffbcd874daa80457f947da). - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag f9640e0fa4245691cdd434e4f719ac5f7d455f82). - [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later). - [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing: From c94a08551749ed7db29557d9043053224f823ac7 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 13 Jul 2022 15:23:16 -0400 Subject: [PATCH 14/30] removed all Travis artifacts --- .travis.yml | 111 ------------- INSTALL.md | 2 - README.md | 1 - bin/build-boost-linux.sh | 41 ----- bin/build-eigen3-linux.sh | 42 ----- bin/build-linux.sh | 147 ------------------ bin/build-madness-linux.sh | 85 ---------- bin/build-mpich-linux.sh | 42 ----- bin/build-scalapack-mpich-linux.sh | 48 ------ bin/deploy-linux.sh | 62 -------- bin/docker-cuda.md | 2 +- bin/docker-travis-build.sh | 93 ----------- bin/docker-travis.md | 33 ---- bin/docker.md | 2 +- .../contrib/Travis-CI-Administration-Notes.md | 14 +- 15 files changed, 5 insertions(+), 720 deletions(-) delete mode 100644 .travis.yml delete mode 100755 bin/build-boost-linux.sh delete mode 100755 bin/build-eigen3-linux.sh delete mode 100755 bin/build-linux.sh delete mode 100755 bin/build-madness-linux.sh delete mode 100755 bin/build-mpich-linux.sh delete mode 100755 bin/build-scalapack-mpich-linux.sh delete mode 100755 bin/deploy-linux.sh delete mode 100755 bin/docker-travis-build.sh delete mode 100644 bin/docker-travis.md diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0bf6535c4a..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,111 +0,0 @@ -# See http://about.travis-ci.org/docs/user/build-configuration/ -# To validate this file: http://lint.travis-ci.org/ - -language: cpp -dist: focal -cache: ccache -cache: - directories: - - /home/travis/_install -os: linux - -addons: - apt: - packages: &base_packages - - libblas-dev - - liblapack-dev - - liblapacke-dev - - libtbb-dev - - lcov - - python3 - - python3-pip - - python3-pytest - - python3-numpy - -env: - global: - - BUILD_PREFIX=/home/travis/_build - - INSTALL_PREFIX=/home/travis/_install - -matrix: - fast_finish: true - include: - - compiler: gcc - env: GCC_VERSION=7 BUILD_TYPE=Debug MADNESS_OVER_PARSEC=1 - addons: - apt: - packages: - - *base_packages - - g++-7 - - gfortran-7 - - compiler: gcc - env: GCC_VERSION=7 BUILD_TYPE=Debug - addons: - apt: - packages: - - *base_packages - - g++-7 - - gfortran-7 - - compiler: gcc - env: GCC_VERSION=7 BUILD_TYPE=Release - addons: - apt: - packages: - - *base_packages - - g++-7 - - gfortran-7 - - compiler: gcc - env: GCC_VERSION=8 BUILD_TYPE=Debug COMPUTE_COVERAGE=1 MADNESS_OVER_PARSEC=1 - addons: - apt: - packages: - - *base_packages - - g++-8 - - gfortran-8 - - compiler: gcc - env: GCC_VERSION=8 BUILD_TYPE=Release - addons: - apt: - packages: - - *base_packages - - g++-8 - - gfortran-8 - - compiler: gcc - env: GCC_VERSION=9 BUILD_TYPE=Debug MADNESS_OVER_PARSEC=1 - addons: - apt: - sources: - - sourceline: 'ppa:ubuntu-toolchain-r/test' - packages: - - *base_packages - - g++-9 - - gfortran-9 - -before_install: - - env - - mkdir -p ${BUILD_PREFIX} && mkdir -p ${INSTALL_PREFIX} -# use timeout to stop long-running (i.e. cache-rebuilding) jobs right before they get killed by Travis-CI -# in case of timeout report success to Travis to force cache upload -script: - - travis_wait 50 timeout 2850 ${TRAVIS_BUILD_DIR}/bin/build-$TRAVIS_OS_NAME.sh; RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 124 ]; then true; else false; fi; -after_failure: - - cat ${BUILD_PREFIX}/TA/external/madness-build/CMakeFiles/CMakeError.log - - cat ${BUILD_PREFIX}/TA/external/madness-build/CMakeFiles/CMakeOutput.log - - cat ${BUILD_PREFIX}/TA/CMakeFiles/CMakeError.log - - cat ${BUILD_PREFIX}/TA/CMakeFiles/CMakeOutput.log -# codecov -after_success: - # create report - - cd ${TRAVIS_BUILD_DIR} - - if [ "$COMPUTE_COVERAGE" = "1" ]; then lcov --gcov-tool gcov-${GCC_VERSION} --directory ${BUILD_PREFIX}/TA --capture --output-file coverage.info; fi; # capture coverage info - - if [ "$COMPUTE_COVERAGE" = "1" ]; then lcov --remove coverage.info '/usr/*' '*/madness/*' '*/btas/*' '*/tests/*' --output-file coverage.info; fi; # filter out non-project files - - if [ "$COMPUTE_COVERAGE" = "1" ]; then lcov --list coverage.info; fi; #debug info - - echo ${TRAVIS_CMD} - # upload report to CodeCov - - if [ "$COMPUTE_COVERAGE" = "1" ]; then bash <(curl -s https://codecov.io/bash) -t token; fi; - # deploy artifacts: currently only dox - - if [ "$DEPLOY" = "1" ]; then bash ${TRAVIS_BUILD_DIR}/bin/deploy-$TRAVIS_OS_NAME.sh; fi; - -notifications: - slack: - secure: aSmy6FmiEf+0gcbVpJs0GIrmpI1dF7/WFOXgUkM2wLxw5DBQxE4LW/yt01mvFqAMJLe0LzGujx/V/z98i0kA1S8DEMTqJ+IG2bbdmgb5CAw5LTP5Air1P2SeAyKW/eAAsnGsERaEnHj8nnZEa2dhbAFOPD5QDM7nwWG/xUkIGMU= diff --git a/INSTALL.md b/INSTALL.md index 683a684f0b..579f92f28d 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -30,8 +30,6 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - [Apple Clang](https://en.wikipedia.org/wiki/Xcode), version 9.3 or higher - [Intel C++ compiler](https://software.intel.com/en-us/c-compilers), version 19 or higher - See the current [Travis CI matrix](.travis.yml) for the most up-to-date list of compilers that are known to work. - - [CMake](https://cmake.org/), version 3.15 or higher; if CUDA support is needed, CMake 3.18 or higher is required. - [Git](https://git-scm.com/) 1.8 or later (required to obtain TiledArray and MADNESS source code from GitHub) - [Eigen](http://eigen.tuxfamily.org/), version 3.3.5 or higher; if CUDA is enabled then 3.3.7 is required (will be downloaded automatically, if missing) diff --git a/README.md b/README.md index 853629526a..8742d1e774 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -[![Travis Build Status](https://travis-ci.com/ValeevGroup/tiledarray.svg?branch=master)](https://travis-ci.com/ValeevGroup/tiledarray) [![Gitlab Pipeline Status](https://gitlab.com/ValeevGroup/tiledarray/badges/master/pipeline.svg)](https://gitlab.com/ValeevGroup/tiledarray/-/pipelines?page=1&scope=all&ref=master) [![codecov](https://codecov.io/gh/ValeevGroup/tiledarray/branch/master/graph/badge.svg)](https://codecov.io/gh/ValeevGroup/tiledarray) diff --git a/bin/build-boost-linux.sh b/bin/build-boost-linux.sh deleted file mode 100755 index 7c4fca8bbf..0000000000 --- a/bin/build-boost-linux.sh +++ /dev/null @@ -1,41 +0,0 @@ -#! /bin/sh - -export BOOST_VERSION=1_74_0 - -# Exit on error -set -ev - -if [ "$CXX" = "g++" ]; then - export CXX=/usr/bin/g++-$GCC_VERSION - export CXXFLAGS="-mno-avx" - export BOOST_TOOLSET=gcc -else - export CXX=/usr/bin/clang++-$CLANG_VERSION - export CXXFLAGS="-mno-avx -stdlib=libc++" - export BOOST_TOOLSET=clang -fi - -if [ "X$BUILD_TYPE" = "XDebug" ]; then - export BOOST_VARIANT="debug" -else - export BOOST_VARIANT="release" -fi - -# download+unpack (but not build!) Boost unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/boost -if [ ! -d "${INSTALL_DIR}" ]; then - rm -fr boost_${BOOST_VERSION}.tar.bz2 - wget https://boostorg.jfrog.io/artifactory/main/release/1.74.0/source/boost_${BOOST_VERSION}.tar.bz2 - tar -xjf boost_${BOOST_VERSION}.tar.bz2 - cd boost_${BOOST_VERSION} - cat > user-config.jam << END -using ${BOOST_TOOLSET} : : ${CXX} : - "${CXXFLAGS}" - "${CXXFLAGS}" ; -END - ./bootstrap.sh --prefix=${INSTALL_DIR} --with-libraries=serialization - ./b2 -d0 --user-config=`pwd`/user-config.jam toolset=${BOOST_TOOLSET} link=static variant=${BOOST_VARIANT} - ./b2 -d0 install -else - echo "Boost already installed ..." -fi diff --git a/bin/build-eigen3-linux.sh b/bin/build-eigen3-linux.sh deleted file mode 100755 index 5f2133111b..0000000000 --- a/bin/build-eigen3-linux.sh +++ /dev/null @@ -1,42 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# Install packages - -# Environment variables -if [ "$CXX" = "g++" ]; then - export CC=/usr/bin/gcc-$GCC_VERSION - export CXX=/usr/bin/g++-$GCC_VERSION - export EXTRACXXFLAGS="-mno-avx" -else - export CC=/usr/bin/clang-$CLANG_VERSION - export CXX=/usr/bin/clang++-$CLANG_VERSION - export EXTRACXXFLAGS="-mno-avx -stdlib=libc++" -fi - -# Print compiler information -$CC --version -$CXX --version - -# log the CMake version (need 3+) -cmake --version - -# Install Eigen3 unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/eigen3 -if [ ! -d "${INSTALL_DIR}" ]; then - cd ${BUILD_PREFIX} - wget -q https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.bz2 - tar -xjf eigen-3.3.7.tar.bz2 - cd eigen-* - mkdir build - cd build - cmake .. -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_CXX_FLAGS="${EXTRACXXFLAGS}" \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} - make install -else - echo "Eigen3 already installed ..." -fi diff --git a/bin/build-linux.sh b/bin/build-linux.sh deleted file mode 100755 index a6c55ed951..0000000000 --- a/bin/build-linux.sh +++ /dev/null @@ -1,147 +0,0 @@ -#! /bin/sh - -# get the most recent cmake available -if [ ! -d "${INSTALL_PREFIX}/cmake" ]; then - CMAKE_VERSION=3.17.0 - CMAKE_URL="https://cmake.org/files/v${CMAKE_VERSION%.[0-9]}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz" - mkdir ${INSTALL_PREFIX}/cmake && wget --no-check-certificate -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C ${INSTALL_PREFIX}/cmake -fi -export PATH=${INSTALL_PREFIX}/cmake/bin:${PATH} -cmake --version - -export PYTHON_EXECUTABLE=$(which python3) -export TA_PYTHON=ON - -${TRAVIS_BUILD_DIR}/bin/build-mpich-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-scalapack-mpich-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-madness-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-boost-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-eigen3-linux.sh - -# Exit on error -set -ev - -# download latest Doxygen -if [ "$DEPLOY" = "1" ]; then - DOXYGEN_VERSION=1.8.20 - if [ ! -d ${INSTALL_PREFIX}/doxygen-${DOXYGEN_VERSION} ]; then - cd ${BUILD_PREFIX} && wget https://downloads.sourceforge.net/project/doxygen/rel-${DOXYGEN_VERSION}/doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz - cd ${INSTALL_PREFIX} && tar xzf ${BUILD_PREFIX}/doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz - fi - export PATH=${INSTALL_PREFIX}/doxygen-${DOXYGEN_VERSION}/bin:$PATH - which doxygen - doxygen --version -fi - -# Environment variables -if [ "$CXX" = "g++" ]; then - export CC=/usr/bin/gcc-$GCC_VERSION - export CXX=/usr/bin/g++-$GCC_VERSION - export EXTRACXXFLAGS="-mno-avx" - # if linking statically will need fortran libs to detect liblapacke.a in BTAS - export F77=gfortran-$GCC_VERSION -else - export CC=/usr/bin/clang-$CLANG_VERSION - export CXX=/usr/bin/clang++-$CLANG_VERSION - export EXTRACXXFLAGS="-mno-avx -stdlib=libc++" - # if linking statically will need fortran libs to detect liblapacke.a in BTAS - export F77=gfortran-$GCC_VERSION -fi - -export MPI_HOME=${INSTALL_PREFIX}/mpich -export MPICC=$MPI_HOME/bin/mpicc -export MPICXX=$MPI_HOME/bin/mpicxx -export LD_LIBRARY_PATH=/usr/lib/lapack:/usr/lib/libblas:${INSTALL_PREFIX}/scalapack/lib:$LD_LIBRARY_PATH - -# list the prebuilt prereqs -ls -l ${INSTALL_PREFIX} - -# where to install TA (need for testing installed code) -export INSTALL_DIR=${INSTALL_PREFIX}/TA - -# make build dir -cd ${BUILD_PREFIX} -mkdir -p TA -cd TA - -# if have old installed copy of TA, make sure that BTAS tag matches the required tag, if not, remove INSTALL_DIR (will cause rebuild of TA) -if [ -f "${INSTALL_DIR}/include/btas/version.h" ]; then - export INSTALLED_BTAS_TAG=`grep 'define BTAS_REVISION' ${INSTALL_DIR}/include/btas/version.h | awk '{print $3}' | sed s/\"//g` - echo "installed BTAS revision = ${INSTALLED_BTAS_TAG}" - # extract the tracked tag of BTAS - export BTAS_TAG=`grep 'set(TA_TRACKED_BTAS_TAG ' ${TRAVIS_BUILD_DIR}/external/versions.cmake | awk '{print $2}' | sed s/\)//g` - echo "required BTAS revision = ${BTAS_TAG}" - if [ "${BTAS_TAG}" != "${INSTALLED_BTAS_TAG}" ]; then - rm -rf "${INSTALL_DIR}" - fi -fi - -# MADNESS are build separately if $BUILD_TYPE=Debug, otherwise built as part of TA -if [ "$BUILD_TYPE" = "Debug" ]; then - - if [ "$COMPUTE_COVERAGE" = "1" ]; then - export CODECOVCXXFLAGS="-O0 --coverage" - fi - - cmake ${TRAVIS_BUILD_DIR} \ - -DCMAKE_TOOLCHAIN_FILE=cmake/vg/toolchains/travis.cmake \ - -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_Fortran_COMPILER=$F77 \ - -DMPI_CXX_COMPILER=$MPICXX \ - -DMPI_C_COMPILER=$MPICC \ - -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ - -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_CXX_FLAGS="-ftemplate-depth=1024 -Wno-unused-command-line-argument ${EXTRACXXFLAGS} ${CODECOVCXXFLAGS}" \ - -DCMAKE_PREFIX_PATH="${INSTALL_PREFIX}/madness;${INSTALL_PREFIX}/eigen3;${INSTALL_PREFIX}/boost" \ - -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \ - -DTA_PYTHON="${TA_PYTHON}" \ - -DENABLE_SCALAPACK=ON - -else - - # if have old installed copy of TA, make sure that MADNESS tag matches the required tag, if not, remove INSTALL_DIR (will cause rebuild of MADNESS) - if [ -f "${INSTALL_DIR}/include/madness/config.h" ]; then - export INSTALLED_MADNESS_TAG=`grep 'define MADNESS_REVISION' ${INSTALL_DIR}/include/madness/config.h | awk '{print $3}' | sed s/\"//g` - echo "installed MADNESS revision = ${INSTALLED_MADNESS_TAG}" - # extract the tracked tag of MADNESS - export MADNESS_TAG=`grep 'set(TA_TRACKED_MADNESS_TAG ' ${TRAVIS_BUILD_DIR}/external/versions.cmake | awk '{print $2}' | sed s/\)//g` - echo "required MADNESS revision = ${MADNESS_TAG}" - if [ "${MADNESS_TAG}" != "${INSTALLED_MADNESS_TAG}" ]; then - rm -rf "${INSTALL_DIR}" - fi - fi - - cmake ${TRAVIS_BUILD_DIR} \ - -DCMAKE_TOOLCHAIN_FILE=cmake/vg/toolchains/travis.cmake \ - -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_Fortran_COMPILER=$F77 \ - -DMPI_CXX_COMPILER=$MPICXX \ - -DMPI_C_COMPILER=$MPICC \ - -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ - -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_CXX_FLAGS="-ftemplate-depth=1024 -Wno-unused-command-line-argument ${EXTRACXXFLAGS}" \ - -DCMAKE_PREFIX_PATH="${INSTALL_PREFIX}/eigen3;${INSTALL_PREFIX}/boost" \ - -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \ - -DTA_PYTHON="${TA_PYTHON}" \ - -DTA_ASSERT_POLICY=TA_ASSERT_THROW \ - -DENABLE_SCALAPACK=ON - -fi - -# Build all libraries, examples, and applications -make -j2 all VERBOSE=1 -make install -# remove install dir to avoid broken artifacts like BTAS polluting the next build via cached copy -rm -rf $INSTALL_DIR - -# Validate -make -j1 ta_test VERBOSE=1 -export MAD_NUM_THREADS=2 -# to find dep shared libs (do we need this since El is gone?) -export LD_LIBRARY_PATH=${INSTALL_PREFIX}/TA/lib:${INSTALL_PREFIX}/madness/lib:${LD_LIBRARY_PATH} -make check-tiledarray - -# Build examples -make -j2 examples VERBOSE=1 diff --git a/bin/build-madness-linux.sh b/bin/build-madness-linux.sh deleted file mode 100755 index d255bff92d..0000000000 --- a/bin/build-madness-linux.sh +++ /dev/null @@ -1,85 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# Will build MADNESS stand-alone for Debug builds only -if [ "$BUILD_TYPE" = "Debug" ]; then - - # Environment variables - if [ "$CXX" = "g++" ]; then - export CC=/usr/bin/gcc-$GCC_VERSION - export CXX=/usr/bin/g++-$GCC_VERSION - export EXTRACXXFLAGS="-mno-avx" - export F77=gfortran-$GCC_VERSION - else - export CC=/usr/bin/clang-$CLANG_VERSION - export CXX=/usr/bin/clang++-$CLANG_VERSION - export EXTRACXXFLAGS="-mno-avx -stdlib=libc++" - export F77=gfortran-$GCC_VERSION - fi - - export MPI_HOME=${INSTALL_PREFIX}/mpich - export MPICC=$MPI_HOME/bin/mpicc - export MPICXX=$MPI_HOME/bin/mpicxx - export LD_LIBRARY_PATH=/usr/lib/lapack:/usr/lib/libblas:$LD_LIBRARY_PATH - - # list the prebuilt prereqs - ls -l ${INSTALL_PREFIX} - - # where to install MADNESS (need for testing installed code) - export INSTALL_DIR=${INSTALL_PREFIX}/madness - - # extract the tracked tag of MADNESS - export MADNESS_TAG=`grep 'set(TA_TRACKED_MADNESS_TAG ' ${TRAVIS_BUILD_DIR}/external/versions.cmake | awk '{print $2}' | sed s/\)//g` - echo "required MADNESS revision = ${MADNESS_TAG}" - - # make sure installed MADNESS tag matches the required tag, if not, remove INSTALL_DIR (will cause reinstall) - if [ -f "${INSTALL_DIR}/include/madness/config.h" ]; then - export INSTALLED_MADNESS_TAG=`grep 'define MADNESS_REVISION' ${INSTALL_DIR}/include/madness/config.h | awk '{print $3}' | sed s/\"//g` - echo "installed MADNESS revision = ${INSTALLED_MADNESS_TAG}" - if [ "${MADNESS_TAG}" != "${INSTALLED_MADNESS_TAG}" ]; then - rm -rf "${INSTALL_DIR}" - fi - fi - - if [ ! -d "${INSTALL_DIR}" ]; then - - # make build dir - cd ${BUILD_PREFIX} - mkdir -p madness - cd madness - - if [ -n "${MADNESS_OVER_PARSEC}" ]; then - MADNESS_BACKEND_OPTION="-DMADNESS_TASK_BACKEND=PaRSEC" - fi - - # check out the tracked tag of MADNESS - git clone https://github.com/TESSEorg/madness.git madness_src && cd madness_src && git checkout ${MADNESS_TAG} && cd .. - - cmake madness_src \ - -DCMAKE_TOOLCHAIN_FILE="${TRAVIS_BUILD_DIR}/cmake/toolchains/travis.cmake" \ - -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DMPI_CXX_COMPILER=$MPICXX \ - -DMPI_C_COMPILER=$MPICC \ - -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ - -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_CXX_FLAGS="-ftemplate-depth=1024 -Wno-unused-command-line-argument ${EXTRACXXFLAGS}" \ - -DMADNESS_BUILD_MADWORLD_ONLY=ON \ - -DENABLE_MPI=ON \ - -DMPI_THREAD=multiple \ - -DENABLE_TBB=OFF \ - -DTBB_ROOT_DIR=/usr \ - -DFORTRAN_INTEGER_SIZE=4 \ - -DENABLE_LIBXC=OFF \ - -DENABLE_GPERFTOOLS=OFF \ - -DASSERTION_TYPE=throw \ - -DDISABLE_WORLD_GET_DEFAULT=ON \ - ${MADNESS_BACKEND_OPTION} - - # Build+install MADworld interface - make -j2 install VERBOSE=1 - fi - -fi diff --git a/bin/build-mpich-linux.sh b/bin/build-mpich-linux.sh deleted file mode 100755 index 7e38ef3167..0000000000 --- a/bin/build-mpich-linux.sh +++ /dev/null @@ -1,42 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# Install packages - -# always use gcc to compile MPICH, there are unexplained issues with clang (e.g. MPI_Barrier aborts) -export CC=/usr/bin/gcc-$GCC_VERSION -export CXX=/usr/bin/g++-$GCC_VERSION -export FC=/usr/bin/gfortran-$GCC_VERSION - -# Print compiler information -$CC --version -$CXX --version -$FC --version - -# log the CMake version (need 3+) -cmake --version - -# Install MPICH unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/mpich -if [ ! -d "${INSTALL_DIR}" ]; then - cd ${BUILD_PREFIX} - export MPICH_VERSION=3.3 - wget --no-check-certificate -q http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz - tar -xzf mpich-${MPICH_VERSION}.tar.gz - cd mpich-${MPICH_VERSION} - ./configure FC=$FC CC=$CC CXX=$CXX --prefix=${INSTALL_DIR} - make -j2 - make install - ${INSTALL_DIR}/bin/mpichversion - ${INSTALL_DIR}/bin/mpicc -show - ${INSTALL_DIR}/bin/mpicxx -show - ${INSTALL_DIR}/bin/mpifort -show -else - echo "MPICH installed..." - find ${INSTALL_DIR} -name mpiexec - find ${INSTALL_DIR} -name mpicc - find ${INSTALL_DIR} -name mpicxx - find ${INSTALL_DIR} -name mpifort -fi diff --git a/bin/build-scalapack-mpich-linux.sh b/bin/build-scalapack-mpich-linux.sh deleted file mode 100755 index 213d7bc5a7..0000000000 --- a/bin/build-scalapack-mpich-linux.sh +++ /dev/null @@ -1,48 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# always use gcc, just like mpich ... ? -export CC=/usr/bin/gcc-$GCC_VERSION -export CXX=/usr/bin/g++-$GCC_VERSION -export FC=/usr/bin/gfortran-$GCC_VERSION - -# Print compiler information -$CC --version -$CXX --version -$FC --version - -# log the CMake version (need 3+) -cmake --version - -# Install MPICH unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/scalapack -if [ ! -d "${INSTALL_DIR}" ]; then - - # Make sure MPI is built - ${INSTALL_PREFIX}/mpich/bin/mpichversion - ${INSTALL_PREFIX}/mpich/bin/mpicc -show - ${INSTALL_PREFIX}/mpich/bin/mpicxx -show - ${INSTALL_PREFIX}/mpich/bin/mpif90 -show - - cd ${BUILD_PREFIX} - git clone https://github.com/Reference-ScaLAPACK/scalapack.git - cd scalapack - git checkout 0efeeb6d2ec9faf0f2fd6108de5eda60773cdcf9 # checked revision - cmake -H. -Bbuild_scalapack \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_Fortran_COMPILER=$FC \ - -DMPI_C_COMPILER=${INSTALL_PREFIX}/mpich/bin/mpicc \ - -DMPI_Fortran_COMPILER=${INSTALL_PREFIX}/mpich/bin/mpif90 \ - -DCMAKE_TOOLCHAIN_FILE="${TRAVIS_BUILD_DIR}/cmake/toolchains/travis.cmake" \ - -DCMAKE_PREFIX_PATH=${INSTALL_DIR} \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} - - cmake --build build_scalapack -j2 - cmake --build build_scalapack --target install - find ${INSTALL_DIR} -name libscalapack.so -else - echo "ScaLAPACK installed..." - find ${INSTALL_DIR} -name libscalapack.so -fi diff --git a/bin/deploy-linux.sh b/bin/deploy-linux.sh deleted file mode 100755 index 279a8f69e8..0000000000 --- a/bin/deploy-linux.sh +++ /dev/null @@ -1,62 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -git config --global user.email "travis@travis-ci.org" -git config --global user.name "Travis CI" - -# only non-cron job deploys -RUN=1 -if [ "$TRAVIS_EVENT_TYPE" = "cron" ] || [ "$TRAVIS_BRANCH" != "master" ]; then - RUN=0 -fi -if [ "$RUN" = "0" ]; then - echo "Deployment skipped" - exit 0 -fi - -# deploy from the build area -cd ${BUILD_PREFIX}/TA - -### deploy docs -# see https://gist.github.com/willprice/e07efd73fb7f13f917ea - -# build docs -export VERBOSE=1 -cmake --build . --target html -if [ ! -f "${BUILD_PREFIX}/TA/doc/dox/html/index.html" ]; then - echo "Target html built successfully but did not produce index.html" - exit 1 -fi - -# check out current docs + template -git clone --depth=1 https://github.com/ValeevGroup/tiledarray.git --branch gh-pages --single-branch tiledarray-docs-current -git clone --depth=1 https://github.com/ValeevGroup/tiledarray.git --branch gh-pages-template --single-branch tiledarray-docs-template -mkdir tiledarray-docs -cp -rp tiledarray-docs-current/* tiledarray-docs -rm -rf tiledarray-docs-current -cp -rp tiledarray-docs-template/* tiledarray-docs -rm -rf tiledarray-docs-template -cd tiledarray-docs -# copy TA's README.md into index.md -cp ${TRAVIS_BUILD_DIR}/README.md index.md -# update dox -if [ -d dox-master ]; then - rm -rf dox-master -fi -mv ${BUILD_PREFIX}/TA/doc/dox/html dox-master -# Jekyll does not allow files with "special" names, e.g. whose names start with underscore -# must "include" such files explicitly -# re: how file names must be formatted: see https://github.com/jekyll/jekyll/issues/1352 -echo "include:" >> _config.yml -find dox-master -name "_*" | sed "s/dox-master\// \- /g" >> _config.yml -# make empty repo to ensure gh-pages contains no history -git init -git add * -git commit -a -q -m "rebuilt TA master docs via Travis build: $TRAVIS_BUILD_NUMBER" -git checkout -b gh-pages -git remote add origin https://${GH_TILEDARRAY_TOKEN}@github.com/ValeevGroup/tiledarray.git > /dev/null 2>&1 -git push origin +gh-pages --force -cd .. -rm -rf tiledarray-docs diff --git a/bin/docker-cuda.md b/bin/docker-cuda.md index a525369070..0f39c0ac20 100644 --- a/bin/docker-cuda.md +++ b/bin/docker-cuda.md @@ -1,5 +1,5 @@ # Intro -These notes describe how to build TiledArray with CUDA support enabled within the latest nvidia/cuda Docker image (https://hub.docker.com/r/nvidia/cuda/). This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). If you want to use Docker to run/debug Travis-CI jobs, see [docker-travis.md](docker-travis.md) +These notes describe how to build TiledArray with CUDA support enabled within the latest nvidia/cuda Docker image (https://hub.docker.com/r/nvidia/cuda/). This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). # Using These notes assume that Docker 19.03 and NVIDIA Container Toolkit (https://github.com/NVIDIA/nvidia-docker) are installed on your machine and that you start at the top of the TiledArray source tree. diff --git a/bin/docker-travis-build.sh b/bin/docker-travis-build.sh deleted file mode 100755 index 4209bad9ef..0000000000 --- a/bin/docker-travis-build.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash - -# this script builds a 'Bionic' env docker image used by Travis-CI for TiledArray project -# -# to run bash in the image: docker run -it tiledarray-travis-debug bash -l -# see docker-travis.md for further instructions -# N.B. relevant locations: -# - source dir: /home/travis/build/ValeevGroup/tiledarray (TRAVIS_BUILD_DIR env in Travis jobs) -# - build dir: /home/travis/_build -# - install dir: /home/travis/_install - -# this is where in the container file system Travis-CI "starts" -export TRAVIS_BUILD_TOPDIR=/home/travis/build -export DIRNAME=`dirname $0` -export ABSDIRNAME=`pwd $DIRNAME` - -############################################################## -# make a script to download all prereqs and clone TiledArray repo -setup=setup.sh -cat > $setup << END -#!/bin/sh -curl -sSL "http://apt.llvm.org/llvm-snapshot.gpg.key" | apt-key add - -echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal main" | tee -a /etc/apt/sources.list > /dev/null -apt-add-repository -y "ppa:ubuntu-toolchain-r/test" -apt-get -yq update >> ~/apt-get-update.log -apt-get -yq --no-install-suggests --no-install-recommends --force-yes install g++-7 g++-8 g++-9 gfortran-7 gfortran-8 gfortran-9 libblas-dev liblapack-dev liblapacke-dev libtbb-dev clang-8 clang-9 cmake cmake-data libclang1-9 graphviz fonts-liberation \ -python3 python3-pip python3-pytest python3-numpy -mkdir -p ${TRAVIS_BUILD_TOPDIR} -cd ${TRAVIS_BUILD_TOPDIR} -git clone https://github.com/ValeevGroup/tiledarray.git ${TRAVIS_BUILD_TOPDIR}/ValeevGroup/tiledarray -END -chmod +x $setup - -############################################################## -# make a script to build all extra prereqs once in the container -build=build.sh -cat > $build << END -#!/bin/sh -cd /home/travis/_build -export BUILD_PREFIX=/home/travis/_build -export INSTALL_PREFIX=/home/travis/_install -export TRAVIS_BUILD_DIR=${TRAVIS_BUILD_TOPDIR}/ValeevGroup/tiledarray -export TRAVIS_EVENT_TYPE=cron -export TRAVIS_OS_NAME=linux -\${TRAVIS_BUILD_DIR}/bin/build-\$TRAVIS_OS_NAME.sh -END -chmod +x $build - -############################################################## -# make Dockerfile -cat > Dockerfile << END -# Travis default 'Focal' image -FROM travisci/ci-ubuntu-2004:packer-1609444725-e5de6974 - -# Use baseimage-docker's init system. -CMD ["/sbin/my_init"] - -# create source, build, and install dirs -RUN mkdir -p /home/travis/_build -RUN mkdir -p /home/travis/_install - -# install prereqs -ADD $setup /home/travis/_build/$setup -RUN /home/travis/_build/$setup - -# Clean up APT when done. -RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# copy travis scripts -ADD $build /home/travis/_build/$build - -# for further info ... -RUN echo "\e[92mDone! For info on how to use the image refer to $ABSDIRNAME/docker-travis.md\e[0m" - -END - -function clean_up { - rm -f $setup $build Dockerfile - exit -} - -trap clean_up SIGHUP SIGINT SIGTERM - -############################################################## -# build a dev image -docker build -t tiledarray-travis-debug . - -############################################################## -# extra admin tasks, uncomment as needed - -############################################################## -# done -clean_up diff --git a/bin/docker-travis.md b/bin/docker-travis.md deleted file mode 100644 index 65e43632df..0000000000 --- a/bin/docker-travis.md +++ /dev/null @@ -1,33 +0,0 @@ -# Intro -These notes describe how to build TiledArray within the latest Travis-CI Docker image. This is useful for debugging Travis-CI jobs on your local machine. -# Using -These notes assume that Docker is installed on your machine and that you start at the top of the TiledArray source tree. - -## Create/build Docker Travis image -1. Create a Travis-CI docker image: `cd bin; ./docker-travis-build.sh` -2. Run a container using the newly created image: `docker run -it tiledarray-travis-debug bash -l` -3. `cd /home/travis/_build` -4. Configure the job to use the appropriate compiler, compiler version, and debug/release build type: - * `export BUILD_TYPE=B`, where `B` is `Debug` or `Release`. - * If want to use GNU C++ compiler (gcc): - * `export GCC_VERSION=VVV` where `VVV` should be the GCC version to be used. The currently valid values are `7`, `8` and `9`. - * `export CXX=g++` - * If want to use Clang C++ compiler (clang++): - * `export GCC_VERSION=8` - * `export CLANG_VERSION=VVV` where `VVV` should be the Clang version to be used. The currently valid values is `11`. - * `export CXX=clang++` - * `apt-get update && apt-get install libc++-${CLANG_VERSION}-dev libc++abi-${CLANG_VERSION}-dev` -5. Build prerequisites (MPICH, MADNESS, ScaLAPACK), TiledArray, and run tests: `./build.sh` - -## Notes -* According to [Travis-CI docs](https://docs.travis-ci.com/user/reference/overview/) you want to configure your Docker to run containers with 2 cores and 7.5 GB of RAM to best match the production environment. -* If you plan to use this container multiple times it might make sense to take a snapshot at this point to avoid having to recompile the prerequisites each and every time. Store it as a separate image, e.g. `docker commit container_id tiledarray-travis-debug:clang-debug`, where `container_id` can be found in the output of `docker ps`. Next time to start debugging you will need to pull updates to the TiledArray source (do `cd /home/travis/build/ValeevGroup/tiledarray && git pull`), then execute step 2 with the new image name, execute step 3, and go directly to step 6. -* To install `gdb` execute `apt-get update && apt-get install gdb`. Also, it appears that to be able to attach `gdb` or any other debugger to a running process you must run the Docker container in privileged mode as `docker run --privileged -it tiledarray-travis-debug:clang-debug bash -l`. -* To debug parallel jobs you want to launch jobs in a gdb in an xterm. To run xterm you need to ssh into the container. To start an ssh server in the container do this: - * Connect sshd's port of the container (22) to an unprivileged port (say, 2222) of the host: `docker run -p 127.0.0.1:2222:22 --privileged -it tiledarray-travis-debug:clang-debug bash -l` - * Generate host keys: `ssh-keygen -A` - * Create a root password: `passwd` and follow prompts. No need to be fancy: security is not a concern here, but `passwd` will not accept an empty password. N.B. This is easier than setting up a pubkey login, so don't bother with that. - * Edit `/etc/ssh/sshd_config` and allow root to log in by ensuring that `PermitRootLogin` and `PasswordAuthentication` are set to `yes`. - * Start ssh server: `/etc/init.d/ssh start` - * (optional) To launch gdb in xterm windows: `apt-get update && apt-get install xterm` - * You should be able to log in from an xterm on the host side: `ssh -Y -p 2222 root@localhost` diff --git a/bin/docker.md b/bin/docker.md index fb558db6db..1826c95ef2 100644 --- a/bin/docker.md +++ b/bin/docker.md @@ -1,5 +1,5 @@ # Intro -These notes describe how to build TiledArray within the latest phusion (https://github.com/phusion/baseimage-docker) Docker image. This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). If you want to use Docker to run/debug Travis-CI jobs, see [docker-travis.md](docker-travis.md) +These notes describe how to build TiledArray within the latest phusion (https://github.com/phusion/baseimage-docker) Docker image. This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). # Using These notes assume that Docker is installed on your machine and that you start at the top of the TiledArray source tree. diff --git a/doc/dox/contrib/Travis-CI-Administration-Notes.md b/doc/dox/contrib/Travis-CI-Administration-Notes.md index 0b626507cd..0284ebf0b9 100644 --- a/doc/dox/contrib/Travis-CI-Administration-Notes.md +++ b/doc/dox/contrib/Travis-CI-Administration-Notes.md @@ -1,13 +1,5 @@ -# Managing Travis Builds {#Travis-CI-Administration-Notes} +# Managing CI Builds {#CI-Administration-Notes} ## Basic Facts -* Travis CI configuration is in file `.travis.yml`, and build scripts are in `bin/build-*linux.sh`. Only Linux builds are currently supported. -* `BUILD_TYPE=Debug` jobs build and install MADNESS separately, before building TiledArray' `BUILD_TYPE=Release` jobs build MADNESS as a step of the TiledArray build. -* MPICH and (`BUILD_TYPE=Debug` only) MADNESS installation directories are _cached_. **Build scripts only verify the presence of installed directories, and do not update them if their configuration (e.g. static vs. shared, or code version) has changed. _Thus it is admin's responsibility to manually wipe out the cache on a per-branch basis_.** It is the easiest to do via the Travis-CI web interface (click on 'More Options' menu at the top right, select 'Caches', etc.). -* Rebuilding cache of prerequisites may take more time than the job limit (50 mins at the moment), so rebuilding cache can take several attempts. Since Travis-CI does not support forced cache updates (see e.g. https://github.com/travis-ci/travis-ci/issues/6410) if the job looks like it's going to time out we report success to Travis just so that it will store cache. __Thus jobs that timed out will be falsely reported as successful (rather than errored)!__ When rebuilding cache it may be necessary to manually restart some build jobs to make sure that cache rebuild is complete (or, just to be sure, restart the whole __build__ one time just to be sure all caches have been rebuilt). Again: this is only relevant when rebuilding caches (i.e. <5% of the time), otherwise there should be no need to restart jobs manually. - -# Debugging Travis-CI jobs - -## Local debugging - -Follow the instructions contained in [docker-travis.md](https://github.com/ValeevGroup/tiledarray/blob/master/bin/docker-travis.md) . +* TiledArray only uses GitLab CI at this point +* CI configuration is in file `.gitlab-ci.yml`, and build metadata is in `ci/`. Only Linux builds are currently supported. From 754a88069195236cd6b425a144dc918fd2b3d1c8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 13 Jul 2022 15:25:22 -0400 Subject: [PATCH 15/30] bump BTAS tag to pull in https://github.com/ValeevGroup/BTAS/pull/136 --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 579f92f28d..f999a98747 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -38,7 +38,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Container: header-only - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing* - Boost.Range: header-only, *only used for unit testing* -- [BTAS](http://github.com/ValeevGroup/BTAS), tag 242871710dabd5ef337e5253000d3e38c1d977ba . If usable BTAS installation is not found, TiledArray will download and compile +- [BTAS](http://github.com/ValeevGroup/BTAS), tag da2cb0ea3f10b0a88b1532e708c7358ca92bde6a . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 997e8b458c4234fb6c8c2781a5df59cb14b7e700 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. diff --git a/external/versions.cmake b/external/versions.cmake index c1120147d9..8419cba40f 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -24,8 +24,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG fae8081179b9d074968b08e064a32e3ca07ab0f1) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 242871710dabd5ef337e5253000d3e38c1d977ba) -set(TA_TRACKED_BTAS_PREVIOUS_TAG db884b020b5c13c312c07df9d5c03cea2d65afb2) +set(TA_TRACKED_BTAS_TAG da2cb0ea3f10b0a88b1532e708c7358ca92bde6a) +set(TA_TRACKED_BTAS_PREVIOUS_TAG 242871710dabd5ef337e5253000d3e38c1d977ba) set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 7e27ac766a9038df6aa05613784a54a036c4b796) From 8d9b49a82be9a11ef497b6d27babd467d1ba4b0b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 30 Sep 2022 16:05:30 -0400 Subject: [PATCH 16/30] introduced TA_SCOPED_INITIALIZE and details (scoped finalizer, etc.) --- examples/cuda/ta_cc_abcd_cuda.cpp | 5 +---- examples/cuda/ta_dense_cuda.cpp | 4 +--- examples/cuda/ta_reduce_cuda.cpp | 4 +--- examples/cuda/ta_vector_cuda.cpp | 4 +--- src/TiledArray/initialize.h | 24 ++++++++++++++++++++++++ src/TiledArray/tiledarray.cpp | 12 ++++++++++++ 6 files changed, 40 insertions(+), 13 deletions(-) diff --git a/examples/cuda/ta_cc_abcd_cuda.cpp b/examples/cuda/ta_cc_abcd_cuda.cpp index c67895f7dc..6a2ef26e5f 100644 --- a/examples/cuda/ta_cc_abcd_cuda.cpp +++ b/examples/cuda/ta_cc_abcd_cuda.cpp @@ -60,7 +60,7 @@ int main(int argc, char** argv) { try { // Initialize runtime - TA::World& world = TA::initialize(argc, argv); + TA::World& world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 5) { @@ -136,9 +136,6 @@ int main(int argc, char** argv) { } else { cc_abcd(world, trange_occ, trange_uocc, repeat); } - - TA::finalize(); - } catch (TA::Exception& e) { std::cerr << "!! TiledArray exception: " << e.what() << "\n"; rc = 1; diff --git a/examples/cuda/ta_dense_cuda.cpp b/examples/cuda/ta_dense_cuda.cpp index 51ebc67b11..14f692329b 100644 --- a/examples/cuda/ta_dense_cuda.cpp +++ b/examples/cuda/ta_dense_cuda.cpp @@ -300,7 +300,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, int try_main(int argc, char **argv) { // Initialize runtime - TiledArray::World &world = TiledArray::initialize(argc, argv); + TiledArray::World &world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 6) { @@ -453,8 +453,6 @@ int try_main(int argc, char **argv) { throw std::runtime_error("Invalid storage type!\n"); } - TiledArray::finalize(); - return 0; } diff --git a/examples/cuda/ta_reduce_cuda.cpp b/examples/cuda/ta_reduce_cuda.cpp index 417fa2d72f..c275863519 100644 --- a/examples/cuda/ta_reduce_cuda.cpp +++ b/examples/cuda/ta_reduce_cuda.cpp @@ -239,7 +239,7 @@ using cudaTile = TiledArray::Tile>; int try_main(int argc, char **argv) { // Initialize runtime - TiledArray::World &world = TiledArray::initialize(argc, argv); + TiledArray::World &world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 4) { @@ -365,8 +365,6 @@ int try_main(int argc, char **argv) { do_main_body>(world, Nm, Bm, Nn, Bn, nrepeat); } - TiledArray::finalize(); - return 0; } diff --git a/examples/cuda/ta_vector_cuda.cpp b/examples/cuda/ta_vector_cuda.cpp index f3c6265eb1..f5d2772ced 100644 --- a/examples/cuda/ta_vector_cuda.cpp +++ b/examples/cuda/ta_vector_cuda.cpp @@ -258,7 +258,7 @@ using cudaTile = TiledArray::Tile>; int try_main(int argc, char **argv) { // Initialize runtime - TiledArray::World &world = TiledArray::initialize(argc, argv); + auto &world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 4) { @@ -384,8 +384,6 @@ int try_main(int argc, char **argv) { do_main_body>(world, Nm, Bm, Nn, Bn, nrepeat); } - TiledArray::finalize(); - return 0; } diff --git a/src/TiledArray/initialize.h b/src/TiledArray/initialize.h index c86fa1d151..324f772ccf 100644 --- a/src/TiledArray/initialize.h +++ b/src/TiledArray/initialize.h @@ -60,10 +60,34 @@ inline World& initialize(int& argc, char**& argv, const MPI_Comm& comm, /// @} +#ifndef TA_SCOPED_INITIALIZE +/// calling this will initialize TA and then finalize it when leaving this scope +#define TA_SCOPED_INITIALIZE(args...) \ + TiledArray::initialize(args); \ + auto finalizer = TiledArray::scoped_finalizer(); +#endif + /// Finalizes TiledArray (and MADWorld runtime, if it had not been initialized /// when TiledArray::initialize was called). void finalize(); +namespace detail { +struct Finalizer { + ~Finalizer() noexcept; +}; +} // namespace detail + +/// creates an object whose destruction upon leaving this scope will cause +/// TiledArray::finalize to be called +detail::Finalizer scoped_finalizer(); + +#ifndef TA_FINALIZE_AFTER_LEAVING_THIS_SCOPE +/// calling this will cause TiledArray::finalize() to be called (if needed) +/// upon leaving this scope +#define TA_FINALIZE_AFTER_LEAVING_THIS_SCOPE() \ + auto finalizer = TiledArray::scoped_finalizer(); +#endif + void taskq_wait_busy(); void taskq_wait_yield(); void taskq_wait_usleep(int); diff --git a/src/TiledArray/tiledarray.cpp b/src/TiledArray/tiledarray.cpp index 3840fe750a..b4700ddec9 100644 --- a/src/TiledArray/tiledarray.cpp +++ b/src/TiledArray/tiledarray.cpp @@ -39,6 +39,8 @@ inline void cuda_finalize() { librettFinalize(); cublasDestroy(cuBLASHandlePool::handle()); delete &cuBLASHandlePool::handle(); + // although TA::cudaEnv is a singleton, must explicitly delete it so + // that CUDA runtime is not finalized before the cudaEnv dtor is called cudaEnv::instance().reset(nullptr); } #endif @@ -173,6 +175,16 @@ void TiledArray::finalize() { finalized_accessor() = true; } +TiledArray::detail::Finalizer::~Finalizer() noexcept { + static std::mutex mtx; + std::scoped_lock lock(mtx); + if (TiledArray::initialized()) { + TiledArray::finalize(); + } +} + +TiledArray::detail::Finalizer TiledArray::scoped_finalizer() { return {}; } + void TiledArray::ta_abort() { SafeMPI::COMM_WORLD.Abort(); } void TiledArray::ta_abort(const std::string& m) { From 2196a11cfe455a3a5478fb7949f3a58024d8ff37 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 30 Sep 2022 16:06:28 -0400 Subject: [PATCH 17/30] fixed ta_vector_cuda for asymmetric matrices --- examples/cuda/ta_vector_cuda.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/cuda/ta_vector_cuda.cpp b/examples/cuda/ta_vector_cuda.cpp index f5d2772ced..1593a68e8b 100644 --- a/examples/cuda/ta_vector_cuda.cpp +++ b/examples/cuda/ta_vector_cuda.cpp @@ -62,8 +62,9 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, blocking.push_back( TiledArray::TiledRange1(blocking_n.begin(), blocking_n.end())); - TiledArray::TiledRange // TRange - trange(blocking.begin(), blocking.end()); + TiledArray::TiledRange trange(blocking.begin(), blocking.end()); + TiledArray::TiledRange trange_tr(blocking.rbegin(), + blocking.rend()); // transposed trange using value_type = typename Tile::value_type; using TArray = TA::DistArray; @@ -150,7 +151,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); @@ -222,7 +223,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); From c536b1b5a13feb46fb0c5c2dc07c18ff37705e15 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 30 Sep 2022 16:14:51 -0400 Subject: [PATCH 18/30] use single CUDA stream for unit tests for now --- tests/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ac9250463..88ea115334 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -154,9 +154,11 @@ if(ENABLE_MPI) $ --log_level=unit_scope ${${executable}_np_${p}_args} ${MPIEXEC_POSTFLAGS} ) + # N.B. some CUDA unit tests require TA_CUDA_NUM_STREAMS=1 for now set_tests_properties(tiledarray/unit/run-np-${p} PROPERTIES FIXTURES_REQUIRED TA_UNIT_TESTS_EXEC - ENVIRONMENT MAD_NUM_THREADS=2) + ENVIRONMENT "MAD_NUM_THREADS=2;TA_CUDA_NUM_STREAMS=1" + ) if (p GREATER 1) set_tests_properties(tiledarray/unit/run-np-${p} PROPERTIES ENVIRONMENT TA_UT_DISTRIBUTED=1) @@ -165,7 +167,9 @@ if(ENABLE_MPI) else() add_test(NAME tiledarray/unit/run-np-1 COMMAND ${executable}) + # N.B. some CUDA unit tests require TA_CUDA_NUM_STREAMS=1 for now set_tests_properties(tiledarray/unit/run-np-1 PROPERTIES FIXTURES_REQUIRED TA_UNIT_TESTS_EXEC - ENVIRONMENT MAD_NUM_THREADS=2) + ENVIRONMENT "MAD_NUM_THREADS=2;TA_CUDA_NUM_STREAMS=1" + ) endif() From 15f54ae42cb54e3350f48d23ac46284b175bac9f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 1 Oct 2022 10:45:59 -0400 Subject: [PATCH 19/30] [ci] attempt to resolve https://gitlab.com/ValeevGroup/tiledarray/-/jobs/3112494437#L1284 --- ci/.build-project | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/.build-project b/ci/.build-project index a9c9f7582a..44b208242d 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -80,9 +80,14 @@ if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then cmd "make -C /home/ValeevGroup install/cuda" + cmd "rm -fr /usr/local/bin/nvcc" cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" cmd "${CUDACXX} -V" + # this will be moved to image builder + cmd "sudo apt-get -yq update" + cmd "sudo apt-get -yq install nvidia-utils-510" cmd "find / -name \"*nvidia-smi\"" + cmd "nvidia-smi" fi section_end preparing_system_section From 8f2a363da06b51c10c516968fb76de88e29a29b0 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 7 Oct 2022 15:43:32 -0400 Subject: [PATCH 20/30] fixed ta_reduce_cuda for asymmetric matrices, resolves #366 --- examples/cuda/ta_reduce_cuda.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/cuda/ta_reduce_cuda.cpp b/examples/cuda/ta_reduce_cuda.cpp index c275863519..e453069892 100644 --- a/examples/cuda/ta_reduce_cuda.cpp +++ b/examples/cuda/ta_reduce_cuda.cpp @@ -62,6 +62,8 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, TiledArray::TiledRange // TRange trange(blocking.begin(), blocking.end()); + TiledArray::TiledRange trange_tr(blocking.rbegin(), + blocking.rend()); // transposed trange using value_type = typename Tile::value_type; using TArray = TA::DistArray; @@ -116,7 +118,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); @@ -198,7 +200,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); From 32fa553424f09ca9729fdd5d67cf943ca88519d5 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 8 Oct 2022 19:56:38 -0400 Subject: [PATCH 21/30] try using valeevgroup/ubuntu:cuda image --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6ab502b527..7df07712c6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -66,7 +66,7 @@ ubuntu: CXX: [ g++, clang++-9 ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] - - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] + - IMAGE : [ "ubuntu:cuda" ] CXX: [ g++ ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] From 0c0722930ed47aeff718a3b0295a6d3229a910c8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 9 Oct 2022 11:08:10 -0400 Subject: [PATCH 22/30] no need to install cuda when using valeevgroup/ubuntu:cuda image --- ci/.build-project | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ci/.build-project b/ci/.build-project index 44b208242d..1b4e9dc749 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -79,15 +79,8 @@ if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then cmd "echo MKLROOT=\$MKLROOT" fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then - cmd "make -C /home/ValeevGroup install/cuda" - cmd "rm -fr /usr/local/bin/nvcc" cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" cmd "${CUDACXX} -V" - # this will be moved to image builder - cmd "sudo apt-get -yq update" - cmd "sudo apt-get -yq install nvidia-utils-510" - cmd "find / -name \"*nvidia-smi\"" - cmd "nvidia-smi" fi section_end preparing_system_section From 1d73c641a6d9879de4dc42303f1d45aa5d8a669e Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 10 Oct 2022 12:33:16 -0400 Subject: [PATCH 23/30] [ci] invoke nvidia-smi to dump driver + CUDA info to the log --- .gitlab-ci.yml | 3 ++- ci/.build-project | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7df07712c6..e42ca6fa79 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,7 +6,8 @@ default: interruptible: true variables: - MAD_NUM_THREADS : 2 + MAD_NUM_THREADS : "2" + DOCKER_GPUS : "all" TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray" # Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT TA_CONFIG : > diff --git a/ci/.build-project b/ci/.build-project index 1b4e9dc749..6159653d2e 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -71,8 +71,6 @@ cmd "cmake -P ci/host_system_info.cmake" section_end host_system_info section_start "preparing_system_section[collapsed=true]" "Preparing system" -cmd "source ci/openmpi.env" -cmd "echo 'localhost slots=2' > /etc/openmpi/openmpi-default-hostfile" if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then cmd "make -C /home/ValeevGroup install/intel-mkl" cmd "source /opt/intel/mkl/bin/mklvars.sh intel64" @@ -81,7 +79,13 @@ fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" cmd "${CUDACXX} -V" + cmd "lspci" + cmd "env" + cmd "ls -l /usr/bin" + cmd "/usr/bin/nvidia-smi" fi +cmd "source ci/openmpi.env" +cmd "echo 'localhost slots=2' > /etc/openmpi/openmpi-default-hostfile" section_end preparing_system_section section_start configure_section "Configure" From 08df8697793fd1b314546d4ccd0a450784ac89a8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 11 Oct 2022 17:24:08 -0400 Subject: [PATCH 24/30] Revert "[ci] invoke nvidia-smi to dump driver + CUDA info to the log" This reverts commit 1fc764c712c23405b828a45e94a19907b08da5ef. --- .gitlab-ci.yml | 3 +-- ci/.build-project | 8 ++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e42ca6fa79..7df07712c6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,8 +6,7 @@ default: interruptible: true variables: - MAD_NUM_THREADS : "2" - DOCKER_GPUS : "all" + MAD_NUM_THREADS : 2 TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray" # Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT TA_CONFIG : > diff --git a/ci/.build-project b/ci/.build-project index 6159653d2e..1b4e9dc749 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -71,6 +71,8 @@ cmd "cmake -P ci/host_system_info.cmake" section_end host_system_info section_start "preparing_system_section[collapsed=true]" "Preparing system" +cmd "source ci/openmpi.env" +cmd "echo 'localhost slots=2' > /etc/openmpi/openmpi-default-hostfile" if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then cmd "make -C /home/ValeevGroup install/intel-mkl" cmd "source /opt/intel/mkl/bin/mklvars.sh intel64" @@ -79,13 +81,7 @@ fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" cmd "${CUDACXX} -V" - cmd "lspci" - cmd "env" - cmd "ls -l /usr/bin" - cmd "/usr/bin/nvidia-smi" fi -cmd "source ci/openmpi.env" -cmd "echo 'localhost slots=2' > /etc/openmpi/openmpi-default-hostfile" section_end preparing_system_section section_start configure_section "Configure" From 4e0ce094d4709f060250df9147e21a50b185603b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 11 Oct 2022 17:24:08 -0400 Subject: [PATCH 25/30] Revert "no need to install cuda when using valeevgroup/ubuntu:cuda image" This reverts commit 15e1d5bc3e694547bae6f8a3c671ce9cb89e9380. --- ci/.build-project | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ci/.build-project b/ci/.build-project index 1b4e9dc749..44b208242d 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -79,8 +79,15 @@ if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then cmd "echo MKLROOT=\$MKLROOT" fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then + cmd "make -C /home/ValeevGroup install/cuda" + cmd "rm -fr /usr/local/bin/nvcc" cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" cmd "${CUDACXX} -V" + # this will be moved to image builder + cmd "sudo apt-get -yq update" + cmd "sudo apt-get -yq install nvidia-utils-510" + cmd "find / -name \"*nvidia-smi\"" + cmd "nvidia-smi" fi section_end preparing_system_section From dd1273255356f354f78ffa2026c5ee82998da849 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 11 Oct 2022 17:24:08 -0400 Subject: [PATCH 26/30] Revert "try using valeevgroup/ubuntu:cuda image" This reverts commit 2223c8ed294d59a459ec20bc1313522b8c24862b. --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7df07712c6..6ab502b527 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -66,7 +66,7 @@ ubuntu: CXX: [ g++, clang++-9 ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] - - IMAGE : [ "ubuntu:cuda" ] + - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] CXX: [ g++ ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] From 7a7bc76f40f0683660774a1795e20916960bb61f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 11 Oct 2022 17:45:37 -0400 Subject: [PATCH 27/30] [ci] define RUNNER_TAGS so that only CUDA jobs end up on our local runner --- .gitlab-ci.yml | 7 ++++++- ci/.build-project | 4 ---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6ab502b527..4708470e0f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -28,7 +28,9 @@ before_script: ubuntu: stage: build - tags: [ docker ] + tags: + - docker + - ${RUNNER_TAGS} timeout: 3h image: valeevgroup/${IMAGE} variables: @@ -62,12 +64,15 @@ ubuntu: BLA_THREADS : [ "IntelMKL_THREAD_LAYER=tbb" ] # ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] TA_PYTHON : [ "TA_PYTHON=OFF" ] # needs to be fixed for MKL + RUNNER_TAGS: [ linux ] - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] CXX: [ g++, clang++-9 ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] + RUNNER_TAGS: [ linux ] - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] CXX: [ g++ ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ] + RUNNER_TAGS: [ cuda ] diff --git a/ci/.build-project b/ci/.build-project index 44b208242d..1e1596a6f9 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -83,10 +83,6 @@ if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then cmd "rm -fr /usr/local/bin/nvcc" cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" cmd "${CUDACXX} -V" - # this will be moved to image builder - cmd "sudo apt-get -yq update" - cmd "sudo apt-get -yq install nvidia-utils-510" - cmd "find / -name \"*nvidia-smi\"" cmd "nvidia-smi" fi section_end preparing_system_section From caba336489bea75ac2998462f814f11dc0a5f4ea Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 13 Oct 2022 13:33:23 -0400 Subject: [PATCH 28/30] cleanup --- tests/expressions_cuda_um.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/expressions_cuda_um.cpp b/tests/expressions_cuda_um.cpp index e5e810e29d..a17b749789 100644 --- a/tests/expressions_cuda_um.cpp +++ b/tests/expressions_cuda_um.cpp @@ -528,15 +528,6 @@ BOOST_AUTO_TEST_CASE(scal_add_block) { 2 * (3 * a("a,b,c").block({3, 3, 3}, {5, 5, 5}) + 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5}))); - std::cout << "expr tree for c(\"a,b,c\") =\n" - " 2 * (3 * a(\"a,b,c\").block({3, " - "3, 3}, {5, 5, 5}) +\n" - " 4 * b(\"a,b,c\").block({3, " - "3, 3}, {5, 5, 5})):\n" - << c("a,b,c") - << 2 * (3 * a("a,b,c").block({3, 3, 3}, {5, 5, 5}) + - 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5})); - for (std::size_t index = 0ul; index < block_range.volume(); ++index) { if (!a.is_zero(block_range.ordinal(index)) && !b.is_zero(block_range.ordinal(index))) { @@ -998,10 +989,6 @@ BOOST_AUTO_TEST_CASE(scale_add_permute) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = 5 * (2 * a("c,b,a")) + (3 * b("a,b,c"))); - std::cout << "expr tree for c(\"a,b,c\") = 5 * (2 * a(\"c,b,a\")) + (3 * " - "b(\"a,b,c\")))" - << c("a,b,c") << (5 * (2 * a("c,b,a")) + (3 * b("a,b,c"))); - for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); const size_t perm_index = From 77c95ff3b2000cc11e575acb1a0f25d3b5f9ecec Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 13 Oct 2022 13:34:36 -0400 Subject: [PATCH 29/30] hush warnings re use of Eigen's Tensor header when CUDA enabled --- src/TiledArray/external/eigen.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/TiledArray/external/eigen.h b/src/TiledArray/external/eigen.h index 6ee0eaea3f..cd2c50b522 100644 --- a/src/TiledArray/external/eigen.h +++ b/src/TiledArray/external/eigen.h @@ -46,7 +46,14 @@ TILEDARRAY_PRAGMA_GCC(system_header) #endif #include + +// disable warnings re: ignored attributes on template argument +// Eigen::PacketType::type +// {aka __vector(2) long long int} +TILEDARRAY_PRAGMA_GCC(diagnostic push) +TILEDARRAY_PRAGMA_GCC(diagnostic ignored "-Wignored-attributes") #include +TILEDARRAY_PRAGMA_GCC(diagnostic pop) #if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_LAPACKE_STRICT) #if !EIGEN_VERSION_AT_LEAST(3, 3, 7) From 6bd84aa461d53d7ad266a1d3ed9b786549de0165 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 13 Oct 2022 16:37:02 -0400 Subject: [PATCH 30/30] [ci] try using Ninja and multiple cores --- .gitlab-ci.yml | 5 ++--- ci/.build-project | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4708470e0f..93850215f1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -20,9 +20,8 @@ variables: ${ENABLE_SCALAPACK} before_script: - # NB: below tag parsing is not robust - - echo "CI_RUNNER_TAGS=$CI_RUNNER_TAGS" - - CMAKE_BUILD_PARALLEL_LEVEL=$(echo $CI_RUNNER_TAGS | sed -n 's/CMAKE_BUILD_PARALLEL_LEVEL=\([0-9]\+\).*/\1/p') + # NB: if CMAKE_BUILD_PARALLEL_LEVEL is not set (i.e. using shared runner), use 1 to ensure we have enough memory + # TODO optimize ta_test build memory consumption - export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:=1} - echo "CMAKE_BUILD_PARALLEL_LEVEL=$CMAKE_BUILD_PARALLEL_LEVEL" diff --git a/ci/.build-project b/ci/.build-project index 1e1596a6f9..aeb7c73787 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -89,7 +89,7 @@ section_end preparing_system_section section_start configure_section "Configure" cmd mkdir -p ${build_dir} -time_cmd configure "cmake -B${build_dir} $vars" +time_cmd configure "cmake -GNinja -B${build_dir} $vars" section_end configure_section for target in ${targets}; do