diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 11f03acf70..93850215f1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,12 +7,12 @@ default: variables: MAD_NUM_THREADS : 2 - TA_TARGETS : "tiledarray examples ta_test check-tiledarray" + TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray" # Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT TA_CONFIG : > CMAKE_BUILD_TYPE=${BUILD_TYPE} TA_ASSERT_POLICY=TA_ASSERT_THROW - TA_UT_CTEST_TIMEOUT=2000 + TA_UT_CTEST_TIMEOUT=3000 ${TA_PYTHON} ${ENABLE_CUDA} ${BLA_VENDOR} @@ -20,15 +20,16 @@ variables: ${ENABLE_SCALAPACK} before_script: - # NB: below tag parsing is not robust - - echo "CI_RUNNER_TAGS=$CI_RUNNER_TAGS" - - CMAKE_BUILD_PARALLEL_LEVEL=$(echo $CI_RUNNER_TAGS | sed -n 's/CMAKE_BUILD_PARALLEL_LEVEL=\([0-9]\+\).*/\1/p') + # NB: if CMAKE_BUILD_PARALLEL_LEVEL is not set (i.e. using shared runner), use 1 to ensure we have enough memory + # TODO optimize ta_test build memory consumption - export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:=1} - echo "CMAKE_BUILD_PARALLEL_LEVEL=$CMAKE_BUILD_PARALLEL_LEVEL" ubuntu: stage: build - tags: [ docker ] + tags: + - docker + - ${RUNNER_TAGS} timeout: 3h image: valeevgroup/${IMAGE} variables: @@ -62,12 +63,15 @@ ubuntu: BLA_THREADS : [ "IntelMKL_THREAD_LAYER=tbb" ] # ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] TA_PYTHON : [ "TA_PYTHON=OFF" ] # needs to be fixed for MKL + RUNNER_TAGS: [ linux ] - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] CXX: [ g++, clang++-9 ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] + RUNNER_TAGS: [ linux ] - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] CXX: [ g++ ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] - TA_TARGETS : [ "tiledarray examples" ] + TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ] + RUNNER_TAGS: [ cuda ] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0bf6535c4a..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,111 +0,0 @@ -# See http://about.travis-ci.org/docs/user/build-configuration/ -# To validate this file: http://lint.travis-ci.org/ - -language: cpp -dist: focal -cache: ccache -cache: - directories: - - /home/travis/_install -os: linux - -addons: - apt: - packages: &base_packages - - libblas-dev - - liblapack-dev - - liblapacke-dev - - libtbb-dev - - lcov - - python3 - - python3-pip - - python3-pytest - - python3-numpy - -env: - global: - - BUILD_PREFIX=/home/travis/_build - - INSTALL_PREFIX=/home/travis/_install - -matrix: - fast_finish: true - include: - - compiler: gcc - env: GCC_VERSION=7 BUILD_TYPE=Debug MADNESS_OVER_PARSEC=1 - addons: - apt: - packages: - - *base_packages - - g++-7 - - gfortran-7 - - compiler: gcc - env: GCC_VERSION=7 BUILD_TYPE=Debug - addons: - apt: - packages: - - *base_packages - - g++-7 - - gfortran-7 - - compiler: gcc - env: GCC_VERSION=7 BUILD_TYPE=Release - addons: - apt: - packages: - - *base_packages - - g++-7 - - gfortran-7 - - compiler: gcc - env: GCC_VERSION=8 BUILD_TYPE=Debug COMPUTE_COVERAGE=1 MADNESS_OVER_PARSEC=1 - addons: - apt: - packages: - - *base_packages - - g++-8 - - gfortran-8 - - compiler: gcc - env: GCC_VERSION=8 BUILD_TYPE=Release - addons: - apt: - packages: - - *base_packages - - g++-8 - - gfortran-8 - - compiler: gcc - env: GCC_VERSION=9 BUILD_TYPE=Debug MADNESS_OVER_PARSEC=1 - addons: - apt: - sources: - - sourceline: 'ppa:ubuntu-toolchain-r/test' - packages: - - *base_packages - - g++-9 - - gfortran-9 - -before_install: - - env - - mkdir -p ${BUILD_PREFIX} && mkdir -p ${INSTALL_PREFIX} -# use timeout to stop long-running (i.e. cache-rebuilding) jobs right before they get killed by Travis-CI -# in case of timeout report success to Travis to force cache upload -script: - - travis_wait 50 timeout 2850 ${TRAVIS_BUILD_DIR}/bin/build-$TRAVIS_OS_NAME.sh; RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 124 ]; then true; else false; fi; -after_failure: - - cat ${BUILD_PREFIX}/TA/external/madness-build/CMakeFiles/CMakeError.log - - cat ${BUILD_PREFIX}/TA/external/madness-build/CMakeFiles/CMakeOutput.log - - cat ${BUILD_PREFIX}/TA/CMakeFiles/CMakeError.log - - cat ${BUILD_PREFIX}/TA/CMakeFiles/CMakeOutput.log -# codecov -after_success: - # create report - - cd ${TRAVIS_BUILD_DIR} - - if [ "$COMPUTE_COVERAGE" = "1" ]; then lcov --gcov-tool gcov-${GCC_VERSION} --directory ${BUILD_PREFIX}/TA --capture --output-file coverage.info; fi; # capture coverage info - - if [ "$COMPUTE_COVERAGE" = "1" ]; then lcov --remove coverage.info '/usr/*' '*/madness/*' '*/btas/*' '*/tests/*' --output-file coverage.info; fi; # filter out non-project files - - if [ "$COMPUTE_COVERAGE" = "1" ]; then lcov --list coverage.info; fi; #debug info - - echo ${TRAVIS_CMD} - # upload report to CodeCov - - if [ "$COMPUTE_COVERAGE" = "1" ]; then bash <(curl -s https://codecov.io/bash) -t token; fi; - # deploy artifacts: currently only dox - - if [ "$DEPLOY" = "1" ]; then bash ${TRAVIS_BUILD_DIR}/bin/deploy-$TRAVIS_OS_NAME.sh; fi; - -notifications: - slack: - secure: aSmy6FmiEf+0gcbVpJs0GIrmpI1dF7/WFOXgUkM2wLxw5DBQxE4LW/yt01mvFqAMJLe0LzGujx/V/z98i0kA1S8DEMTqJ+IG2bbdmgb5CAw5LTP5Air1P2SeAyKW/eAAsnGsERaEnHj8nnZEa2dhbAFOPD5QDM7nwWG/xUkIGMU= diff --git a/CMakeLists.txt b/CMakeLists.txt index 382f4abb56..ed419d8f20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -374,14 +374,18 @@ add_subdirectory(doc) ########################## include(CTest) if (BUILD_TESTING) - set(_ctest_args -V -R "tiledarray/unit") + set(_ctest_args -V -R "tiledarray/unit/run-np.*") + set(_ctest_args_serial -V -R "tiledarray/unit/run-np-1") if (DEFINED TA_UT_CTEST_TIMEOUT) list(APPEND _ctest_args --timeout ${TA_UT_CTEST_TIMEOUT}) + list(APPEND _ctest_args_serial --timeout ${TA_UT_CTEST_TIMEOUT}) endif(DEFINED TA_UT_CTEST_TIMEOUT) add_custom_target_subproject(tiledarray check USES_TERMINAL COMMAND ${CMAKE_CTEST_COMMAND} ${_ctest_args}) + add_custom_target_subproject(tiledarray check_serial USES_TERMINAL COMMAND ${CMAKE_CTEST_COMMAND} ${_ctest_args_serial}) add_subdirectory(tests) else() add_custom_target_subproject(tiledarray check USES_TERMINAL COMMAND echo "WARNING: unit testing disabled. To enable, give -DBUILD_TESTING=ON to cmake") + add_custom_target_subproject(tiledarray check_serial USES_TERMINAL COMMAND echo "WARNING: unit testing disabled. To enable, give -DBUILD_TESTING=ON to cmake") endif() ########################## diff --git a/INSTALL.md b/INSTALL.md index 0c3b843d3e..229f76d7d0 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -64,11 +64,11 @@ Compiling BTAS requires the following prerequisites: Optional prerequisites: - [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on CUDA-enabled accelerators. CUDA 11 or later is required. Support for CUDA also requires the following additional prerequisites, both of which will be built and installed automatically if missing: - - [cuTT](github.com/ValeevGroup/cutt) -- CUDA transpose library; note that our fork of the [original cuTT repo](github.com/ap-hynninen/cutt) is required to provide thread-safety (tag 0e8685bf82910bc7435835f846e88f1b39f47f09). + - [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, HIP, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) extended to provide thread-safety improvements (via github.com/ValeevGroup/cutt) and extended to non-CUDA platforms by [@victor-anisimov](github.com/victor-anisimov) (tag 68abe31a9ec6fd2fd9ffbcd874daa80457f947da). - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag f9640e0fa4245691cdd434e4f719ac5f7d455f82). - [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later). - [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing: - - [scalapackpp](https://github.com/wavefunction91/scalapackpp.git) -- a modern C++ (C++17) wrapper for ScaLAPACK (tag 28433942197aee141cd9e96ed1d00f6ec7b902cb); pulls and builds the following additional prerequisite + - [scalapackpp](https://github.com/wavefunction91/scalapackpp.git) -- a modern C++ (C++17) wrapper for ScaLAPACK (tag 711ef363479a90c88788036f9c6c8adb70736cbf); pulls and builds the following additional prerequisite - [blacspp](https://github.com/wavefunction91/blacspp.git) -- a modern C++ (C++17) wrapper for BLACS - Python3 interpreter -- to test (optionally-built) Python bindings - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20; only used for some unit testing of the functionality anticipated to be supported by future C++ standards. @@ -330,7 +330,7 @@ Support for execution on CUDA-enabled hardware is controlled by the following va * `ENABLE_CUDA` -- Set to `ON` to turn on CUDA support. [Default=OFF]. * `CMAKE_CUDA_HOST_COMPILER` -- Set to the path to the host C++ compiler to be used by CUDA compiler. CUDA compilers used to be notorious for only being able to use specific C++ host compilers, but support for more recent C++ host compilers has improved. The default is determined by the CUDA compiler and the user environment variables (`PATH` etc.). * `ENABLE_CUDA_ERROR_CHECK` -- Set to `ON` to turn on assertions for successful completion of calls to CUDA runtime and libraries. [Default=OFF]. -* `CUTT_INSTALL_DIR` -- the installation prefix of the pre-installed cuTT library. This should not be normally needed; it is strongly recommended to let TiledArray build and install cuTT. +* `LIBRETT_INSTALL_DIR` -- the installation prefix of the pre-installed LibreTT library. This should not be normally needed; it is strongly recommended to let TiledArray build and install LibreTT. * `UMPIRE_INSTALL_DIR` -- the installation prefix of the pre-installed Umpire library. This should not be normally needed; it is strongly recommended to let TiledArray build and install Umpire. For the CUDA compiler and toolkit to be discoverable the CUDA compiler (`nvcc`) should be in the `PATH` environment variable. Refer to the [FindCUDAToolkit module](https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html) for more info. diff --git a/README.md b/README.md index 853629526a..8742d1e774 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -[![Travis Build Status](https://travis-ci.com/ValeevGroup/tiledarray.svg?branch=master)](https://travis-ci.com/ValeevGroup/tiledarray) [![Gitlab Pipeline Status](https://gitlab.com/ValeevGroup/tiledarray/badges/master/pipeline.svg)](https://gitlab.com/ValeevGroup/tiledarray/-/pipelines?page=1&scope=all&ref=master) [![codecov](https://codecov.io/gh/ValeevGroup/tiledarray/branch/master/graph/badge.svg)](https://codecov.io/gh/ValeevGroup/tiledarray) diff --git a/bin/admin/dependency-versions-update-hook.py b/bin/admin/dependency-versions-update-hook.py index 19b7123703..686b98b49a 100755 --- a/bin/admin/dependency-versions-update-hook.py +++ b/bin/admin/dependency-versions-update-hook.py @@ -106,11 +106,11 @@ def replace_dep_id(topsrc, file_ext, dep_name, old_id, new_id, search_prefix = ' btas_old_tag = tokens[2] else: btas_new_tag = tokens[2] - elif tokens[1].find('CUTT') != -1: + elif tokens[1].find('LIBRETT') != -1: if tokens[1].find('PREVIOUS') != -1: - cutt_old_tag = tokens[2] + librett_old_tag = tokens[2] else: - cutt_new_tag = tokens[2] + librett_new_tag = tokens[2] elif tokens[1].find('UMPIRE') != -1: if tokens[1].find('PREVIOUS') != -1: umpire_old_tag = tokens[2] @@ -146,8 +146,8 @@ def replace_dep_id(topsrc, file_ext, dep_name, old_id, new_id, search_prefix = ' # BTAS tag in INSTALL.md any_files_changed |= replace_dep_id(topsrc, 'md', 'BTAS', btas_old_tag, btas_new_tag, 'ValeevGroup/BTAS), tag ', '') -# cuTT tag in INSTALL.md -any_files_changed |= replace_dep_id(topsrc, 'md', 'cuTT', cutt_old_tag, cutt_new_tag, '', '') +# LibreTT tag in INSTALL.md +any_files_changed |= replace_dep_id(topsrc, 'md', 'LibreTT', librett_old_tag, librett_new_tag, '', '') # Umpire tag in INSTALL.md any_files_changed |= replace_dep_id(topsrc, 'md', 'Umpire', umpire_old_tag, umpire_new_tag, '', '') diff --git a/bin/build-boost-linux.sh b/bin/build-boost-linux.sh deleted file mode 100755 index 7c4fca8bbf..0000000000 --- a/bin/build-boost-linux.sh +++ /dev/null @@ -1,41 +0,0 @@ -#! /bin/sh - -export BOOST_VERSION=1_74_0 - -# Exit on error -set -ev - -if [ "$CXX" = "g++" ]; then - export CXX=/usr/bin/g++-$GCC_VERSION - export CXXFLAGS="-mno-avx" - export BOOST_TOOLSET=gcc -else - export CXX=/usr/bin/clang++-$CLANG_VERSION - export CXXFLAGS="-mno-avx -stdlib=libc++" - export BOOST_TOOLSET=clang -fi - -if [ "X$BUILD_TYPE" = "XDebug" ]; then - export BOOST_VARIANT="debug" -else - export BOOST_VARIANT="release" -fi - -# download+unpack (but not build!) Boost unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/boost -if [ ! -d "${INSTALL_DIR}" ]; then - rm -fr boost_${BOOST_VERSION}.tar.bz2 - wget https://boostorg.jfrog.io/artifactory/main/release/1.74.0/source/boost_${BOOST_VERSION}.tar.bz2 - tar -xjf boost_${BOOST_VERSION}.tar.bz2 - cd boost_${BOOST_VERSION} - cat > user-config.jam << END -using ${BOOST_TOOLSET} : : ${CXX} : - "${CXXFLAGS}" - "${CXXFLAGS}" ; -END - ./bootstrap.sh --prefix=${INSTALL_DIR} --with-libraries=serialization - ./b2 -d0 --user-config=`pwd`/user-config.jam toolset=${BOOST_TOOLSET} link=static variant=${BOOST_VARIANT} - ./b2 -d0 install -else - echo "Boost already installed ..." -fi diff --git a/bin/build-eigen3-linux.sh b/bin/build-eigen3-linux.sh deleted file mode 100755 index 5f2133111b..0000000000 --- a/bin/build-eigen3-linux.sh +++ /dev/null @@ -1,42 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# Install packages - -# Environment variables -if [ "$CXX" = "g++" ]; then - export CC=/usr/bin/gcc-$GCC_VERSION - export CXX=/usr/bin/g++-$GCC_VERSION - export EXTRACXXFLAGS="-mno-avx" -else - export CC=/usr/bin/clang-$CLANG_VERSION - export CXX=/usr/bin/clang++-$CLANG_VERSION - export EXTRACXXFLAGS="-mno-avx -stdlib=libc++" -fi - -# Print compiler information -$CC --version -$CXX --version - -# log the CMake version (need 3+) -cmake --version - -# Install Eigen3 unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/eigen3 -if [ ! -d "${INSTALL_DIR}" ]; then - cd ${BUILD_PREFIX} - wget -q https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.bz2 - tar -xjf eigen-3.3.7.tar.bz2 - cd eigen-* - mkdir build - cd build - cmake .. -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_CXX_FLAGS="${EXTRACXXFLAGS}" \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} - make install -else - echo "Eigen3 already installed ..." -fi diff --git a/bin/build-linux.sh b/bin/build-linux.sh deleted file mode 100755 index a6c55ed951..0000000000 --- a/bin/build-linux.sh +++ /dev/null @@ -1,147 +0,0 @@ -#! /bin/sh - -# get the most recent cmake available -if [ ! -d "${INSTALL_PREFIX}/cmake" ]; then - CMAKE_VERSION=3.17.0 - CMAKE_URL="https://cmake.org/files/v${CMAKE_VERSION%.[0-9]}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz" - mkdir ${INSTALL_PREFIX}/cmake && wget --no-check-certificate -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C ${INSTALL_PREFIX}/cmake -fi -export PATH=${INSTALL_PREFIX}/cmake/bin:${PATH} -cmake --version - -export PYTHON_EXECUTABLE=$(which python3) -export TA_PYTHON=ON - -${TRAVIS_BUILD_DIR}/bin/build-mpich-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-scalapack-mpich-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-madness-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-boost-linux.sh -${TRAVIS_BUILD_DIR}/bin/build-eigen3-linux.sh - -# Exit on error -set -ev - -# download latest Doxygen -if [ "$DEPLOY" = "1" ]; then - DOXYGEN_VERSION=1.8.20 - if [ ! -d ${INSTALL_PREFIX}/doxygen-${DOXYGEN_VERSION} ]; then - cd ${BUILD_PREFIX} && wget https://downloads.sourceforge.net/project/doxygen/rel-${DOXYGEN_VERSION}/doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz - cd ${INSTALL_PREFIX} && tar xzf ${BUILD_PREFIX}/doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz - fi - export PATH=${INSTALL_PREFIX}/doxygen-${DOXYGEN_VERSION}/bin:$PATH - which doxygen - doxygen --version -fi - -# Environment variables -if [ "$CXX" = "g++" ]; then - export CC=/usr/bin/gcc-$GCC_VERSION - export CXX=/usr/bin/g++-$GCC_VERSION - export EXTRACXXFLAGS="-mno-avx" - # if linking statically will need fortran libs to detect liblapacke.a in BTAS - export F77=gfortran-$GCC_VERSION -else - export CC=/usr/bin/clang-$CLANG_VERSION - export CXX=/usr/bin/clang++-$CLANG_VERSION - export EXTRACXXFLAGS="-mno-avx -stdlib=libc++" - # if linking statically will need fortran libs to detect liblapacke.a in BTAS - export F77=gfortran-$GCC_VERSION -fi - -export MPI_HOME=${INSTALL_PREFIX}/mpich -export MPICC=$MPI_HOME/bin/mpicc -export MPICXX=$MPI_HOME/bin/mpicxx -export LD_LIBRARY_PATH=/usr/lib/lapack:/usr/lib/libblas:${INSTALL_PREFIX}/scalapack/lib:$LD_LIBRARY_PATH - -# list the prebuilt prereqs -ls -l ${INSTALL_PREFIX} - -# where to install TA (need for testing installed code) -export INSTALL_DIR=${INSTALL_PREFIX}/TA - -# make build dir -cd ${BUILD_PREFIX} -mkdir -p TA -cd TA - -# if have old installed copy of TA, make sure that BTAS tag matches the required tag, if not, remove INSTALL_DIR (will cause rebuild of TA) -if [ -f "${INSTALL_DIR}/include/btas/version.h" ]; then - export INSTALLED_BTAS_TAG=`grep 'define BTAS_REVISION' ${INSTALL_DIR}/include/btas/version.h | awk '{print $3}' | sed s/\"//g` - echo "installed BTAS revision = ${INSTALLED_BTAS_TAG}" - # extract the tracked tag of BTAS - export BTAS_TAG=`grep 'set(TA_TRACKED_BTAS_TAG ' ${TRAVIS_BUILD_DIR}/external/versions.cmake | awk '{print $2}' | sed s/\)//g` - echo "required BTAS revision = ${BTAS_TAG}" - if [ "${BTAS_TAG}" != "${INSTALLED_BTAS_TAG}" ]; then - rm -rf "${INSTALL_DIR}" - fi -fi - -# MADNESS are build separately if $BUILD_TYPE=Debug, otherwise built as part of TA -if [ "$BUILD_TYPE" = "Debug" ]; then - - if [ "$COMPUTE_COVERAGE" = "1" ]; then - export CODECOVCXXFLAGS="-O0 --coverage" - fi - - cmake ${TRAVIS_BUILD_DIR} \ - -DCMAKE_TOOLCHAIN_FILE=cmake/vg/toolchains/travis.cmake \ - -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_Fortran_COMPILER=$F77 \ - -DMPI_CXX_COMPILER=$MPICXX \ - -DMPI_C_COMPILER=$MPICC \ - -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ - -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_CXX_FLAGS="-ftemplate-depth=1024 -Wno-unused-command-line-argument ${EXTRACXXFLAGS} ${CODECOVCXXFLAGS}" \ - -DCMAKE_PREFIX_PATH="${INSTALL_PREFIX}/madness;${INSTALL_PREFIX}/eigen3;${INSTALL_PREFIX}/boost" \ - -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \ - -DTA_PYTHON="${TA_PYTHON}" \ - -DENABLE_SCALAPACK=ON - -else - - # if have old installed copy of TA, make sure that MADNESS tag matches the required tag, if not, remove INSTALL_DIR (will cause rebuild of MADNESS) - if [ -f "${INSTALL_DIR}/include/madness/config.h" ]; then - export INSTALLED_MADNESS_TAG=`grep 'define MADNESS_REVISION' ${INSTALL_DIR}/include/madness/config.h | awk '{print $3}' | sed s/\"//g` - echo "installed MADNESS revision = ${INSTALLED_MADNESS_TAG}" - # extract the tracked tag of MADNESS - export MADNESS_TAG=`grep 'set(TA_TRACKED_MADNESS_TAG ' ${TRAVIS_BUILD_DIR}/external/versions.cmake | awk '{print $2}' | sed s/\)//g` - echo "required MADNESS revision = ${MADNESS_TAG}" - if [ "${MADNESS_TAG}" != "${INSTALLED_MADNESS_TAG}" ]; then - rm -rf "${INSTALL_DIR}" - fi - fi - - cmake ${TRAVIS_BUILD_DIR} \ - -DCMAKE_TOOLCHAIN_FILE=cmake/vg/toolchains/travis.cmake \ - -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_Fortran_COMPILER=$F77 \ - -DMPI_CXX_COMPILER=$MPICXX \ - -DMPI_C_COMPILER=$MPICC \ - -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ - -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_CXX_FLAGS="-ftemplate-depth=1024 -Wno-unused-command-line-argument ${EXTRACXXFLAGS}" \ - -DCMAKE_PREFIX_PATH="${INSTALL_PREFIX}/eigen3;${INSTALL_PREFIX}/boost" \ - -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \ - -DTA_PYTHON="${TA_PYTHON}" \ - -DTA_ASSERT_POLICY=TA_ASSERT_THROW \ - -DENABLE_SCALAPACK=ON - -fi - -# Build all libraries, examples, and applications -make -j2 all VERBOSE=1 -make install -# remove install dir to avoid broken artifacts like BTAS polluting the next build via cached copy -rm -rf $INSTALL_DIR - -# Validate -make -j1 ta_test VERBOSE=1 -export MAD_NUM_THREADS=2 -# to find dep shared libs (do we need this since El is gone?) -export LD_LIBRARY_PATH=${INSTALL_PREFIX}/TA/lib:${INSTALL_PREFIX}/madness/lib:${LD_LIBRARY_PATH} -make check-tiledarray - -# Build examples -make -j2 examples VERBOSE=1 diff --git a/bin/build-madness-linux.sh b/bin/build-madness-linux.sh deleted file mode 100755 index d255bff92d..0000000000 --- a/bin/build-madness-linux.sh +++ /dev/null @@ -1,85 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# Will build MADNESS stand-alone for Debug builds only -if [ "$BUILD_TYPE" = "Debug" ]; then - - # Environment variables - if [ "$CXX" = "g++" ]; then - export CC=/usr/bin/gcc-$GCC_VERSION - export CXX=/usr/bin/g++-$GCC_VERSION - export EXTRACXXFLAGS="-mno-avx" - export F77=gfortran-$GCC_VERSION - else - export CC=/usr/bin/clang-$CLANG_VERSION - export CXX=/usr/bin/clang++-$CLANG_VERSION - export EXTRACXXFLAGS="-mno-avx -stdlib=libc++" - export F77=gfortran-$GCC_VERSION - fi - - export MPI_HOME=${INSTALL_PREFIX}/mpich - export MPICC=$MPI_HOME/bin/mpicc - export MPICXX=$MPI_HOME/bin/mpicxx - export LD_LIBRARY_PATH=/usr/lib/lapack:/usr/lib/libblas:$LD_LIBRARY_PATH - - # list the prebuilt prereqs - ls -l ${INSTALL_PREFIX} - - # where to install MADNESS (need for testing installed code) - export INSTALL_DIR=${INSTALL_PREFIX}/madness - - # extract the tracked tag of MADNESS - export MADNESS_TAG=`grep 'set(TA_TRACKED_MADNESS_TAG ' ${TRAVIS_BUILD_DIR}/external/versions.cmake | awk '{print $2}' | sed s/\)//g` - echo "required MADNESS revision = ${MADNESS_TAG}" - - # make sure installed MADNESS tag matches the required tag, if not, remove INSTALL_DIR (will cause reinstall) - if [ -f "${INSTALL_DIR}/include/madness/config.h" ]; then - export INSTALLED_MADNESS_TAG=`grep 'define MADNESS_REVISION' ${INSTALL_DIR}/include/madness/config.h | awk '{print $3}' | sed s/\"//g` - echo "installed MADNESS revision = ${INSTALLED_MADNESS_TAG}" - if [ "${MADNESS_TAG}" != "${INSTALLED_MADNESS_TAG}" ]; then - rm -rf "${INSTALL_DIR}" - fi - fi - - if [ ! -d "${INSTALL_DIR}" ]; then - - # make build dir - cd ${BUILD_PREFIX} - mkdir -p madness - cd madness - - if [ -n "${MADNESS_OVER_PARSEC}" ]; then - MADNESS_BACKEND_OPTION="-DMADNESS_TASK_BACKEND=PaRSEC" - fi - - # check out the tracked tag of MADNESS - git clone https://github.com/TESSEorg/madness.git madness_src && cd madness_src && git checkout ${MADNESS_TAG} && cd .. - - cmake madness_src \ - -DCMAKE_TOOLCHAIN_FILE="${TRAVIS_BUILD_DIR}/cmake/toolchains/travis.cmake" \ - -DCMAKE_CXX_COMPILER=$CXX \ - -DCMAKE_C_COMPILER=$CC \ - -DMPI_CXX_COMPILER=$MPICXX \ - -DMPI_C_COMPILER=$MPICC \ - -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ - -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_CXX_FLAGS="-ftemplate-depth=1024 -Wno-unused-command-line-argument ${EXTRACXXFLAGS}" \ - -DMADNESS_BUILD_MADWORLD_ONLY=ON \ - -DENABLE_MPI=ON \ - -DMPI_THREAD=multiple \ - -DENABLE_TBB=OFF \ - -DTBB_ROOT_DIR=/usr \ - -DFORTRAN_INTEGER_SIZE=4 \ - -DENABLE_LIBXC=OFF \ - -DENABLE_GPERFTOOLS=OFF \ - -DASSERTION_TYPE=throw \ - -DDISABLE_WORLD_GET_DEFAULT=ON \ - ${MADNESS_BACKEND_OPTION} - - # Build+install MADworld interface - make -j2 install VERBOSE=1 - fi - -fi diff --git a/bin/build-mpich-linux.sh b/bin/build-mpich-linux.sh deleted file mode 100755 index 7e38ef3167..0000000000 --- a/bin/build-mpich-linux.sh +++ /dev/null @@ -1,42 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# Install packages - -# always use gcc to compile MPICH, there are unexplained issues with clang (e.g. MPI_Barrier aborts) -export CC=/usr/bin/gcc-$GCC_VERSION -export CXX=/usr/bin/g++-$GCC_VERSION -export FC=/usr/bin/gfortran-$GCC_VERSION - -# Print compiler information -$CC --version -$CXX --version -$FC --version - -# log the CMake version (need 3+) -cmake --version - -# Install MPICH unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/mpich -if [ ! -d "${INSTALL_DIR}" ]; then - cd ${BUILD_PREFIX} - export MPICH_VERSION=3.3 - wget --no-check-certificate -q http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz - tar -xzf mpich-${MPICH_VERSION}.tar.gz - cd mpich-${MPICH_VERSION} - ./configure FC=$FC CC=$CC CXX=$CXX --prefix=${INSTALL_DIR} - make -j2 - make install - ${INSTALL_DIR}/bin/mpichversion - ${INSTALL_DIR}/bin/mpicc -show - ${INSTALL_DIR}/bin/mpicxx -show - ${INSTALL_DIR}/bin/mpifort -show -else - echo "MPICH installed..." - find ${INSTALL_DIR} -name mpiexec - find ${INSTALL_DIR} -name mpicc - find ${INSTALL_DIR} -name mpicxx - find ${INSTALL_DIR} -name mpifort -fi diff --git a/bin/build-scalapack-mpich-linux.sh b/bin/build-scalapack-mpich-linux.sh deleted file mode 100755 index 213d7bc5a7..0000000000 --- a/bin/build-scalapack-mpich-linux.sh +++ /dev/null @@ -1,48 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -# always use gcc, just like mpich ... ? -export CC=/usr/bin/gcc-$GCC_VERSION -export CXX=/usr/bin/g++-$GCC_VERSION -export FC=/usr/bin/gfortran-$GCC_VERSION - -# Print compiler information -$CC --version -$CXX --version -$FC --version - -# log the CMake version (need 3+) -cmake --version - -# Install MPICH unless previous install is cached ... must manually wipe cache on version bump or toolchain update -export INSTALL_DIR=${INSTALL_PREFIX}/scalapack -if [ ! -d "${INSTALL_DIR}" ]; then - - # Make sure MPI is built - ${INSTALL_PREFIX}/mpich/bin/mpichversion - ${INSTALL_PREFIX}/mpich/bin/mpicc -show - ${INSTALL_PREFIX}/mpich/bin/mpicxx -show - ${INSTALL_PREFIX}/mpich/bin/mpif90 -show - - cd ${BUILD_PREFIX} - git clone https://github.com/Reference-ScaLAPACK/scalapack.git - cd scalapack - git checkout 0efeeb6d2ec9faf0f2fd6108de5eda60773cdcf9 # checked revision - cmake -H. -Bbuild_scalapack \ - -DCMAKE_C_COMPILER=$CC \ - -DCMAKE_Fortran_COMPILER=$FC \ - -DMPI_C_COMPILER=${INSTALL_PREFIX}/mpich/bin/mpicc \ - -DMPI_Fortran_COMPILER=${INSTALL_PREFIX}/mpich/bin/mpif90 \ - -DCMAKE_TOOLCHAIN_FILE="${TRAVIS_BUILD_DIR}/cmake/toolchains/travis.cmake" \ - -DCMAKE_PREFIX_PATH=${INSTALL_DIR} \ - -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} - - cmake --build build_scalapack -j2 - cmake --build build_scalapack --target install - find ${INSTALL_DIR} -name libscalapack.so -else - echo "ScaLAPACK installed..." - find ${INSTALL_DIR} -name libscalapack.so -fi diff --git a/bin/deploy-linux.sh b/bin/deploy-linux.sh deleted file mode 100755 index 279a8f69e8..0000000000 --- a/bin/deploy-linux.sh +++ /dev/null @@ -1,62 +0,0 @@ -#! /bin/sh - -# Exit on error -set -ev - -git config --global user.email "travis@travis-ci.org" -git config --global user.name "Travis CI" - -# only non-cron job deploys -RUN=1 -if [ "$TRAVIS_EVENT_TYPE" = "cron" ] || [ "$TRAVIS_BRANCH" != "master" ]; then - RUN=0 -fi -if [ "$RUN" = "0" ]; then - echo "Deployment skipped" - exit 0 -fi - -# deploy from the build area -cd ${BUILD_PREFIX}/TA - -### deploy docs -# see https://gist.github.com/willprice/e07efd73fb7f13f917ea - -# build docs -export VERBOSE=1 -cmake --build . --target html -if [ ! -f "${BUILD_PREFIX}/TA/doc/dox/html/index.html" ]; then - echo "Target html built successfully but did not produce index.html" - exit 1 -fi - -# check out current docs + template -git clone --depth=1 https://github.com/ValeevGroup/tiledarray.git --branch gh-pages --single-branch tiledarray-docs-current -git clone --depth=1 https://github.com/ValeevGroup/tiledarray.git --branch gh-pages-template --single-branch tiledarray-docs-template -mkdir tiledarray-docs -cp -rp tiledarray-docs-current/* tiledarray-docs -rm -rf tiledarray-docs-current -cp -rp tiledarray-docs-template/* tiledarray-docs -rm -rf tiledarray-docs-template -cd tiledarray-docs -# copy TA's README.md into index.md -cp ${TRAVIS_BUILD_DIR}/README.md index.md -# update dox -if [ -d dox-master ]; then - rm -rf dox-master -fi -mv ${BUILD_PREFIX}/TA/doc/dox/html dox-master -# Jekyll does not allow files with "special" names, e.g. whose names start with underscore -# must "include" such files explicitly -# re: how file names must be formatted: see https://github.com/jekyll/jekyll/issues/1352 -echo "include:" >> _config.yml -find dox-master -name "_*" | sed "s/dox-master\// \- /g" >> _config.yml -# make empty repo to ensure gh-pages contains no history -git init -git add * -git commit -a -q -m "rebuilt TA master docs via Travis build: $TRAVIS_BUILD_NUMBER" -git checkout -b gh-pages -git remote add origin https://${GH_TILEDARRAY_TOKEN}@github.com/ValeevGroup/tiledarray.git > /dev/null 2>&1 -git push origin +gh-pages --force -cd .. -rm -rf tiledarray-docs diff --git a/bin/docker-cuda.md b/bin/docker-cuda.md index a525369070..0f39c0ac20 100644 --- a/bin/docker-cuda.md +++ b/bin/docker-cuda.md @@ -1,5 +1,5 @@ # Intro -These notes describe how to build TiledArray with CUDA support enabled within the latest nvidia/cuda Docker image (https://hub.docker.com/r/nvidia/cuda/). This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). If you want to use Docker to run/debug Travis-CI jobs, see [docker-travis.md](docker-travis.md) +These notes describe how to build TiledArray with CUDA support enabled within the latest nvidia/cuda Docker image (https://hub.docker.com/r/nvidia/cuda/). This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). # Using These notes assume that Docker 19.03 and NVIDIA Container Toolkit (https://github.com/NVIDIA/nvidia-docker) are installed on your machine and that you start at the top of the TiledArray source tree. diff --git a/bin/docker-travis-build.sh b/bin/docker-travis-build.sh deleted file mode 100755 index 4209bad9ef..0000000000 --- a/bin/docker-travis-build.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash - -# this script builds a 'Bionic' env docker image used by Travis-CI for TiledArray project -# -# to run bash in the image: docker run -it tiledarray-travis-debug bash -l -# see docker-travis.md for further instructions -# N.B. relevant locations: -# - source dir: /home/travis/build/ValeevGroup/tiledarray (TRAVIS_BUILD_DIR env in Travis jobs) -# - build dir: /home/travis/_build -# - install dir: /home/travis/_install - -# this is where in the container file system Travis-CI "starts" -export TRAVIS_BUILD_TOPDIR=/home/travis/build -export DIRNAME=`dirname $0` -export ABSDIRNAME=`pwd $DIRNAME` - -############################################################## -# make a script to download all prereqs and clone TiledArray repo -setup=setup.sh -cat > $setup << END -#!/bin/sh -curl -sSL "http://apt.llvm.org/llvm-snapshot.gpg.key" | apt-key add - -echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal main" | tee -a /etc/apt/sources.list > /dev/null -apt-add-repository -y "ppa:ubuntu-toolchain-r/test" -apt-get -yq update >> ~/apt-get-update.log -apt-get -yq --no-install-suggests --no-install-recommends --force-yes install g++-7 g++-8 g++-9 gfortran-7 gfortran-8 gfortran-9 libblas-dev liblapack-dev liblapacke-dev libtbb-dev clang-8 clang-9 cmake cmake-data libclang1-9 graphviz fonts-liberation \ -python3 python3-pip python3-pytest python3-numpy -mkdir -p ${TRAVIS_BUILD_TOPDIR} -cd ${TRAVIS_BUILD_TOPDIR} -git clone https://github.com/ValeevGroup/tiledarray.git ${TRAVIS_BUILD_TOPDIR}/ValeevGroup/tiledarray -END -chmod +x $setup - -############################################################## -# make a script to build all extra prereqs once in the container -build=build.sh -cat > $build << END -#!/bin/sh -cd /home/travis/_build -export BUILD_PREFIX=/home/travis/_build -export INSTALL_PREFIX=/home/travis/_install -export TRAVIS_BUILD_DIR=${TRAVIS_BUILD_TOPDIR}/ValeevGroup/tiledarray -export TRAVIS_EVENT_TYPE=cron -export TRAVIS_OS_NAME=linux -\${TRAVIS_BUILD_DIR}/bin/build-\$TRAVIS_OS_NAME.sh -END -chmod +x $build - -############################################################## -# make Dockerfile -cat > Dockerfile << END -# Travis default 'Focal' image -FROM travisci/ci-ubuntu-2004:packer-1609444725-e5de6974 - -# Use baseimage-docker's init system. -CMD ["/sbin/my_init"] - -# create source, build, and install dirs -RUN mkdir -p /home/travis/_build -RUN mkdir -p /home/travis/_install - -# install prereqs -ADD $setup /home/travis/_build/$setup -RUN /home/travis/_build/$setup - -# Clean up APT when done. -RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# copy travis scripts -ADD $build /home/travis/_build/$build - -# for further info ... -RUN echo "\e[92mDone! For info on how to use the image refer to $ABSDIRNAME/docker-travis.md\e[0m" - -END - -function clean_up { - rm -f $setup $build Dockerfile - exit -} - -trap clean_up SIGHUP SIGINT SIGTERM - -############################################################## -# build a dev image -docker build -t tiledarray-travis-debug . - -############################################################## -# extra admin tasks, uncomment as needed - -############################################################## -# done -clean_up diff --git a/bin/docker-travis.md b/bin/docker-travis.md deleted file mode 100644 index 65e43632df..0000000000 --- a/bin/docker-travis.md +++ /dev/null @@ -1,33 +0,0 @@ -# Intro -These notes describe how to build TiledArray within the latest Travis-CI Docker image. This is useful for debugging Travis-CI jobs on your local machine. -# Using -These notes assume that Docker is installed on your machine and that you start at the top of the TiledArray source tree. - -## Create/build Docker Travis image -1. Create a Travis-CI docker image: `cd bin; ./docker-travis-build.sh` -2. Run a container using the newly created image: `docker run -it tiledarray-travis-debug bash -l` -3. `cd /home/travis/_build` -4. Configure the job to use the appropriate compiler, compiler version, and debug/release build type: - * `export BUILD_TYPE=B`, where `B` is `Debug` or `Release`. - * If want to use GNU C++ compiler (gcc): - * `export GCC_VERSION=VVV` where `VVV` should be the GCC version to be used. The currently valid values are `7`, `8` and `9`. - * `export CXX=g++` - * If want to use Clang C++ compiler (clang++): - * `export GCC_VERSION=8` - * `export CLANG_VERSION=VVV` where `VVV` should be the Clang version to be used. The currently valid values is `11`. - * `export CXX=clang++` - * `apt-get update && apt-get install libc++-${CLANG_VERSION}-dev libc++abi-${CLANG_VERSION}-dev` -5. Build prerequisites (MPICH, MADNESS, ScaLAPACK), TiledArray, and run tests: `./build.sh` - -## Notes -* According to [Travis-CI docs](https://docs.travis-ci.com/user/reference/overview/) you want to configure your Docker to run containers with 2 cores and 7.5 GB of RAM to best match the production environment. -* If you plan to use this container multiple times it might make sense to take a snapshot at this point to avoid having to recompile the prerequisites each and every time. Store it as a separate image, e.g. `docker commit container_id tiledarray-travis-debug:clang-debug`, where `container_id` can be found in the output of `docker ps`. Next time to start debugging you will need to pull updates to the TiledArray source (do `cd /home/travis/build/ValeevGroup/tiledarray && git pull`), then execute step 2 with the new image name, execute step 3, and go directly to step 6. -* To install `gdb` execute `apt-get update && apt-get install gdb`. Also, it appears that to be able to attach `gdb` or any other debugger to a running process you must run the Docker container in privileged mode as `docker run --privileged -it tiledarray-travis-debug:clang-debug bash -l`. -* To debug parallel jobs you want to launch jobs in a gdb in an xterm. To run xterm you need to ssh into the container. To start an ssh server in the container do this: - * Connect sshd's port of the container (22) to an unprivileged port (say, 2222) of the host: `docker run -p 127.0.0.1:2222:22 --privileged -it tiledarray-travis-debug:clang-debug bash -l` - * Generate host keys: `ssh-keygen -A` - * Create a root password: `passwd` and follow prompts. No need to be fancy: security is not a concern here, but `passwd` will not accept an empty password. N.B. This is easier than setting up a pubkey login, so don't bother with that. - * Edit `/etc/ssh/sshd_config` and allow root to log in by ensuring that `PermitRootLogin` and `PasswordAuthentication` are set to `yes`. - * Start ssh server: `/etc/init.d/ssh start` - * (optional) To launch gdb in xterm windows: `apt-get update && apt-get install xterm` - * You should be able to log in from an xterm on the host side: `ssh -Y -p 2222 root@localhost` diff --git a/bin/docker.md b/bin/docker.md index fb558db6db..1826c95ef2 100644 --- a/bin/docker.md +++ b/bin/docker.md @@ -1,5 +1,5 @@ # Intro -These notes describe how to build TiledArray within the latest phusion (https://github.com/phusion/baseimage-docker) Docker image. This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). If you want to use Docker to run/debug Travis-CI jobs, see [docker-travis.md](docker-travis.md) +These notes describe how to build TiledArray within the latest phusion (https://github.com/phusion/baseimage-docker) Docker image. This is useful for experimentation and/or provisioning computational results (e.g. for creating supplementary info for a journal article). # Using These notes assume that Docker is installed on your machine and that you start at the top of the TiledArray source tree. diff --git a/ci/.build-project b/ci/.build-project index 79a08d541b..aeb7c73787 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -80,13 +80,16 @@ if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then fi if [[ "$vars" =~ \"-D([a-zA-Z]+_)?ENABLE_CUDA=(ON|TRUE|1|YES)\" ]]; then cmd "make -C /home/ValeevGroup install/cuda" + cmd "rm -fr /usr/local/bin/nvcc" cmd "export CUDACXX=/usr/local/cuda/bin/nvcc" + cmd "${CUDACXX} -V" + cmd "nvidia-smi" fi section_end preparing_system_section section_start configure_section "Configure" cmd mkdir -p ${build_dir} -time_cmd configure "cmake -B${build_dir} $vars" +time_cmd configure "cmake -GNinja -B${build_dir} $vars" section_end configure_section for target in ${targets}; do diff --git a/cmake/modules/FindOrFetchBTAS.cmake b/cmake/modules/FindOrFetchBTAS.cmake index 57a4b94ac0..764ec7046e 100644 --- a/cmake/modules/FindOrFetchBTAS.cmake +++ b/cmake/modules/FindOrFetchBTAS.cmake @@ -13,9 +13,9 @@ if (NOT TARGET BTAS::BTAS) # BTAS will load BLAS++/LAPACK++ ... if those use CMake's FindBLAS/FindLAPACK (as indicated by defined BLA_VENDOR) # will need to specify Fortran linkage convention ... manually for now, switching to NWX's linear algebra discovery # is necessary to handle all the corner cases for automatic discovery - if (DEFINED BLA_VENDOR) + if (BLA_VENDOR) set(_linalgpp_use_standard_linalg_kits TRUE) - endif(DEFINED BLA_VENDOR) + endif(BLA_VENDOR) if (NOT TILEDARRAY_HAS_CUDA) # tell BLAS++/LAPACK++ to ignore CUDA diff --git a/doc/dox/contrib/Travis-CI-Administration-Notes.md b/doc/dox/contrib/Travis-CI-Administration-Notes.md index 0b626507cd..0284ebf0b9 100644 --- a/doc/dox/contrib/Travis-CI-Administration-Notes.md +++ b/doc/dox/contrib/Travis-CI-Administration-Notes.md @@ -1,13 +1,5 @@ -# Managing Travis Builds {#Travis-CI-Administration-Notes} +# Managing CI Builds {#CI-Administration-Notes} ## Basic Facts -* Travis CI configuration is in file `.travis.yml`, and build scripts are in `bin/build-*linux.sh`. Only Linux builds are currently supported. -* `BUILD_TYPE=Debug` jobs build and install MADNESS separately, before building TiledArray' `BUILD_TYPE=Release` jobs build MADNESS as a step of the TiledArray build. -* MPICH and (`BUILD_TYPE=Debug` only) MADNESS installation directories are _cached_. **Build scripts only verify the presence of installed directories, and do not update them if their configuration (e.g. static vs. shared, or code version) has changed. _Thus it is admin's responsibility to manually wipe out the cache on a per-branch basis_.** It is the easiest to do via the Travis-CI web interface (click on 'More Options' menu at the top right, select 'Caches', etc.). -* Rebuilding cache of prerequisites may take more time than the job limit (50 mins at the moment), so rebuilding cache can take several attempts. Since Travis-CI does not support forced cache updates (see e.g. https://github.com/travis-ci/travis-ci/issues/6410) if the job looks like it's going to time out we report success to Travis just so that it will store cache. __Thus jobs that timed out will be falsely reported as successful (rather than errored)!__ When rebuilding cache it may be necessary to manually restart some build jobs to make sure that cache rebuild is complete (or, just to be sure, restart the whole __build__ one time just to be sure all caches have been rebuilt). Again: this is only relevant when rebuilding caches (i.e. <5% of the time), otherwise there should be no need to restart jobs manually. - -# Debugging Travis-CI jobs - -## Local debugging - -Follow the instructions contained in [docker-travis.md](https://github.com/ValeevGroup/tiledarray/blob/master/bin/docker-travis.md) . +* TiledArray only uses GitLab CI at this point +* CI configuration is in file `.gitlab-ci.yml`, and build metadata is in `ci/`. Only Linux builds are currently supported. diff --git a/examples/cc/ccd.cpp b/examples/cc/ccd.cpp index 18106f34c1..2560048d26 100644 --- a/examples/cc/ccd.cpp +++ b/examples/cc/ccd.cpp @@ -96,27 +96,28 @@ int main(int argc, char** argv) { TiledArray::TSpArrayD t_aa_vvoo(world, v_aa_vvoo.trange(), v_aa_vvoo.shape()); - for (auto it = t_aa_vvoo.range().begin(); it != t_aa_vvoo.range().end(); - ++it) + for (auto it = t_aa_vvoo.tiles_range().begin(); + it != t_aa_vvoo.tiles_range().end(); ++it) if (t_aa_vvoo.is_local(*it) && (!t_aa_vvoo.is_zero(*it))) t_aa_vvoo.set(*it, 0.0); TiledArray::TSpArrayD t_ab_vvoo(world, v_ab_vvoo.trange(), v_ab_vvoo.shape()); - for (auto it = t_ab_vvoo.range().begin(); it != t_ab_vvoo.range().end(); - ++it) + for (auto it = t_ab_vvoo.tiles_range().begin(); + it != t_ab_vvoo.tiles_range().end(); ++it) if (t_ab_vvoo.is_local(*it) && (!t_ab_vvoo.is_zero(*it))) t_ab_vvoo.set(*it, 0.0); TiledArray::TSpArrayD t_bb_vvoo(world, v_bb_vvoo.trange(), v_bb_vvoo.shape()); - for (auto it = t_bb_vvoo.range().begin(); it != t_bb_vvoo.range().end(); - ++it) + for (auto it = t_bb_vvoo.tiles_range().begin(); + it != t_bb_vvoo.tiles_range().end(); ++it) if (t_bb_vvoo.is_local(*it) && (!t_bb_vvoo.is_zero(*it))) t_bb_vvoo.set(*it, 0.0); TiledArray::TSpArrayD D_vvoo(world, v_ab_vvoo.trange(), v_ab_vvoo.shape()); - for (auto it = D_vvoo.range().begin(); it != D_vvoo.range().end(); ++it) + for (auto it = D_vvoo.tiles_range().begin(); + it != D_vvoo.tiles_range().end(); ++it) if (D_vvoo.is_local(*it) && (!D_vvoo.is_zero(*it))) D_vvoo.set(*it, world.taskq.add(data, &InputData::make_D_vvoo_tile, D_vvoo.trange().make_tile_range(*it))); diff --git a/examples/cc/ccsd.cpp b/examples/cc/ccsd.cpp index 47a29686fa..f06b53edf1 100644 --- a/examples/cc/ccsd.cpp +++ b/examples/cc/ccsd.cpp @@ -128,15 +128,16 @@ int main(int argc, char** argv) { // // // TArray2s D_vo(world, f_a_vo.trange(), f_a_vo.shape()); - // for(TArray2s::range_type::const_iterator it = D_vo.range().begin(); it - // != D_vo.range().end(); ++it) + // for(TArray2s::range_type::const_iterator it = + // D_vo.tiles_range().begin(); it + // != D_vo.tiles_range().end(); ++it) // if(D_vo.is_local(*it) && (! D_vo.is_zero(*it))) // D_vo.set(*it, world.taskq.add(data, & InputData::make_D_vo_tile, // D_vo.trange().make_tile_range(*it))); // // TArray4s D_vvoo(world, v_ab_vvoo.trange(), v_ab_vvoo.shape()); - // for(TArray4s::range_type::const_iterator it = D_vvoo.range().begin(); - // it != D_vvoo.range().end(); ++it) + // for(TArray4s::range_type::const_iterator it = + // D_vvoo.tiles_range().begin(); it != D_vvoo.tiles_range().end(); ++it) // if(D_vvoo.is_local(*it) && (! D_vvoo.is_zero(*it))) // D_vvoo.set(*it, world.taskq.add(data, & // InputData::make_D_vvoo_tile, diff --git a/examples/cuda/CMakeLists.txt b/examples/cuda/CMakeLists.txt index 2f6affe700..5d7f56c86e 100644 --- a/examples/cuda/CMakeLists.txt +++ b/examples/cuda/CMakeLists.txt @@ -25,7 +25,7 @@ if(CUDA_FOUND) - foreach(_exec cuda_cutt cuda_task ta_dense_cuda ta_cc_abcd_cuda ta_vector_cuda ta_reduce_cuda) + foreach(_exec cuda_librett cuda_task ta_dense_cuda ta_cc_abcd_cuda ta_vector_cuda ta_reduce_cuda) # Add executable add_ta_executable(${_exec} "${_exec}.cpp" "tiledarray") diff --git a/examples/cuda/cuda_cutt.cpp b/examples/cuda/cuda_librett.cpp similarity index 98% rename from examples/cuda/cuda_cutt.cpp rename to examples/cuda/cuda_librett.cpp index edaefc2597..a916bfc729 100644 --- a/examples/cuda/cuda_cutt.cpp +++ b/examples/cuda/cuda_librett.cpp @@ -29,7 +29,7 @@ #include /** - * Test cuTT + * Test LibreTT */ const std::size_t N = 100; diff --git a/examples/cuda/ta_cc_abcd_cuda.cpp b/examples/cuda/ta_cc_abcd_cuda.cpp index c67895f7dc..6a2ef26e5f 100644 --- a/examples/cuda/ta_cc_abcd_cuda.cpp +++ b/examples/cuda/ta_cc_abcd_cuda.cpp @@ -60,7 +60,7 @@ int main(int argc, char** argv) { try { // Initialize runtime - TA::World& world = TA::initialize(argc, argv); + TA::World& world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 5) { @@ -136,9 +136,6 @@ int main(int argc, char** argv) { } else { cc_abcd(world, trange_occ, trange_uocc, repeat); } - - TA::finalize(); - } catch (TA::Exception& e) { std::cerr << "!! TiledArray exception: " << e.what() << "\n"; rc = 1; diff --git a/examples/cuda/ta_dense_cuda.cpp b/examples/cuda/ta_dense_cuda.cpp index 51ebc67b11..14f692329b 100644 --- a/examples/cuda/ta_dense_cuda.cpp +++ b/examples/cuda/ta_dense_cuda.cpp @@ -300,7 +300,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, int try_main(int argc, char **argv) { // Initialize runtime - TiledArray::World &world = TiledArray::initialize(argc, argv); + TiledArray::World &world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 6) { @@ -453,8 +453,6 @@ int try_main(int argc, char **argv) { throw std::runtime_error("Invalid storage type!\n"); } - TiledArray::finalize(); - return 0; } diff --git a/examples/cuda/ta_reduce_cuda.cpp b/examples/cuda/ta_reduce_cuda.cpp index 417fa2d72f..e453069892 100644 --- a/examples/cuda/ta_reduce_cuda.cpp +++ b/examples/cuda/ta_reduce_cuda.cpp @@ -62,6 +62,8 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, TiledArray::TiledRange // TRange trange(blocking.begin(), blocking.end()); + TiledArray::TiledRange trange_tr(blocking.rbegin(), + blocking.rend()); // transposed trange using value_type = typename Tile::value_type; using TArray = TA::DistArray; @@ -116,7 +118,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); @@ -198,7 +200,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); @@ -239,7 +241,7 @@ using cudaTile = TiledArray::Tile>; int try_main(int argc, char **argv) { // Initialize runtime - TiledArray::World &world = TiledArray::initialize(argc, argv); + TiledArray::World &world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 4) { @@ -365,8 +367,6 @@ int try_main(int argc, char **argv) { do_main_body>(world, Nm, Bm, Nn, Bn, nrepeat); } - TiledArray::finalize(); - return 0; } diff --git a/examples/cuda/ta_vector_cuda.cpp b/examples/cuda/ta_vector_cuda.cpp index f3c6265eb1..1593a68e8b 100644 --- a/examples/cuda/ta_vector_cuda.cpp +++ b/examples/cuda/ta_vector_cuda.cpp @@ -62,8 +62,9 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, blocking.push_back( TiledArray::TiledRange1(blocking_n.begin(), blocking_n.end())); - TiledArray::TiledRange // TRange - trange(blocking.begin(), blocking.end()); + TiledArray::TiledRange trange(blocking.begin(), blocking.end()); + TiledArray::TiledRange trange_tr(blocking.rbegin(), + blocking.rend()); // transposed trange using value_type = typename Tile::value_type; using TArray = TA::DistArray; @@ -150,7 +151,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); @@ -222,7 +223,7 @@ void do_main_body(TiledArray::World &world, const long Nm, const long Bm, } TArray a(world, trange); - TArray b(world, trange); + TArray b(world, trange_tr); a.fill(val_a); b.fill(val_b); @@ -258,7 +259,7 @@ using cudaTile = TiledArray::Tile>; int try_main(int argc, char **argv) { // Initialize runtime - TiledArray::World &world = TiledArray::initialize(argc, argv); + auto &world = TA_SCOPED_INITIALIZE(argc, argv); // Get command line arguments if (argc < 4) { @@ -384,8 +385,6 @@ int try_main(int argc, char **argv) { do_main_body>(world, Nm, Bm, Nn, Bn, nrepeat); } - TiledArray::finalize(); - return 0; } diff --git a/external/cuda.cmake b/external/cuda.cmake index 1e5ebd8d60..3b2eb6ce37 100644 --- a/external/cuda.cmake +++ b/external/cuda.cmake @@ -42,6 +42,6 @@ message(STATUS "CMAKE Implicit Link Directories: ${CMAKE_CUDA_IMPLICIT_LINK_DIRE include(external/umpire.cmake) ## -## cuTT +## LibreTT ## -include(external/cutt.cmake) +include(external/librett.cmake) diff --git a/external/cutt.cmake b/external/librett.cmake similarity index 53% rename from external/cutt.cmake rename to external/librett.cmake index dbf4e94f91..a238f3af92 100644 --- a/external/cutt.cmake +++ b/external/librett.cmake @@ -1,48 +1,48 @@ ## -## find cuTT +## find LibreTT ## -find_path(_CUTT_INSTALL_DIR NAMES include/cutt.h lib/libcutt.a HINTS ${CUTT_INSTALL_DIR}) +find_path(_LIBRETT_INSTALL_DIR NAMES include/librett.h lib/librett.a HINTS ${LIBRETT_INSTALL_DIR}) -if( _CUTT_INSTALL_DIR ) +if( _LIBRETT_INSTALL_DIR ) - message(STATUS "cuTT found at ${_CUTT_INSTALL_DIR}") + message(STATUS "LibreTT found at ${_LIBRETT_INSTALL_DIR}") elseif(TA_EXPERT) - message("** cuTT was not found") - message(STATUS "** Downloading and building cuTT is explicitly disabled in EXPERT mode") + message("** LibreTT was not found") + message(STATUS "** Downloading and building LibreTT is explicitly disabled in EXPERT mode") else() - # TODO need to fix the auto installation of cuTT + # TODO need to fix the auto installation of LibreTT include(ExternalProject) # to pass CMAKE_C_* vars to external project enable_language(C) - # set source and build path for cuTT in the TiledArray project - set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/cutt-src) - # cutt only supports in source build - set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/cutt-build) + # set source and build path for LibreTT in the TiledArray project + set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/librett-src) + # librett only supports in source build + set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/librett-build) set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}) - if (NOT CUTT_URL) - set(CUTT_URL https://github.com/ValeevGroup/cutt.git) - endif (NOT CUTT_URL) - if (NOT CUTT_TAG) - set(CUTT_TAG ${TA_TRACKED_CUTT_TAG}) - endif (NOT CUTT_TAG) + if (NOT LIBRETT_URL) + set(LIBRETT_URL https://github.com/victor-anisimov/librett.git) + endif (NOT LIBRETT_URL) + if (NOT LIBRETT_TAG) + set(LIBRETT_TAG ${TA_TRACKED_LIBRETT_TAG}) + endif (NOT LIBRETT_TAG) - message("** Will clone cuTT from ${CUTT_URL}") + message("** Will clone LibreTT from ${LIBRETT_URL}") # need to change the separator of list to avoid issues with ExternalProject parsing # set(CUDA_FLAGS "${CUDA_NVCC_FLAGS}") # string(REPLACE ";" "::" CUDA_FLAGS "${CUDA_NVCC_FLAGS}") #message(STATUS "CUDA_FLAGS: " "${CUDA_FLAGS}") - set(CUTT_CMAKE_ARGS + set(LIBRETT_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_DIR} -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE} @@ -66,87 +66,88 @@ else() -DCMAKE_CUDA_STANDARD=${CMAKE_CUDA_STANDARD} -DCMAKE_CUDA_EXTENSIONS=${CMAKE_CUDA_EXTENSIONS} -DENABLE_UMPIRE=OFF - -DCUTT_USES_THIS_UMPIRE_ALLOCATOR=ThreadSafeUMDynamicPool + -DLIBRETT_USES_THIS_UMPIRE_ALLOCATOR=ThreadSafeUMDynamicPool -DCMAKE_PREFIX_PATH=${_UMPIRE_INSTALL_DIR} -DENABLE_NO_ALIGNED_ALLOC=ON -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} + -DENABLE_CUDA=ON ) if (DEFINED CMAKE_CUDA_ARCHITECTURES) - list(APPEND CUTT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}) + list(APPEND LIBRETT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}) endif(DEFINED CMAKE_CUDA_ARCHITECTURES) if (CMAKE_TOOLCHAIN_FILE) - set(CUTT_CMAKE_ARGS "${CUTT_CMAKE_ARGS}" + set(LIBRETT_CMAKE_ARGS "${LIBRETT_CMAKE_ARGS}" "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") endif(CMAKE_TOOLCHAIN_FILE) if (BUILD_SHARED_LIBS) - set(CUTT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) else(BUILD_SHARED_LIBS) - set(CUTT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) endif(BUILD_SHARED_LIBS) # N.B. Ninja needs spelling out the byproducts of custom targets, see https://cmake.org/cmake/help/v3.3/policy/CMP0058.html - set(CUTT_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/src/libcutt${CUTT_DEFAULT_LIBRARY_SUFFIX}") - message(STATUS "custom target cutt is expected to build these byproducts: ${CUTT_BUILD_BYPRODUCTS}") + set(LIBRETT_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/src/librett${LIBRETT_DEFAULT_LIBRARY_SUFFIX}") + message(STATUS "custom target librett is expected to build these byproducts: ${LIBRETT_BUILD_BYPRODUCTS}") - ExternalProject_Add(cutt + ExternalProject_Add(librett PREFIX ${CMAKE_INSTALL_PREFIX} - STAMP_DIR ${FETCHCONTENT_BASE_DIR}/cutt-ep-artifacts - TMP_DIR ${FETCHCONTENT_BASE_DIR}/cutt-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable + STAMP_DIR ${FETCHCONTENT_BASE_DIR}/librett-ep-artifacts + TMP_DIR ${FETCHCONTENT_BASE_DIR}/librett-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable #--Download step-------------- DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} - GIT_REPOSITORY ${CUTT_URL} - GIT_TAG ${CUTT_TAG} + GIT_REPOSITORY ${LIBRETT_URL} + GIT_TAG ${LIBRETT_TAG} #--Configure step------------- SOURCE_DIR ${EXTERNAL_SOURCE_DIR} LIST_SEPARATOR :: UPDATE_DISCONNECTED 1 CMAKE_ARGS - ${CUTT_CMAKE_ARGS} + ${LIBRETT_CMAKE_ARGS} ${EXTERNAL_SOURCE_DIR} #--Build step----------------- BINARY_DIR ${EXTERNAL_BUILD_DIR} - BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cutt -v - BUILD_BYPRODUCTS ${CUTT_BUILD_BYPRODUCTS} + BUILD_COMMAND ${CMAKE_COMMAND} --build . --target librett -v + BUILD_BYPRODUCTS ${LIBRETT_BUILD_BYPRODUCTS} #--Install step--------------- - INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "cuTT will be installed during TiledArray's installation." + INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "LibreTT will be installed during TiledArray's installation." #--Custom targets------------- STEP_TARGETS build ) - # TiledArray_CUTT target depends on existence of this directory to be usable from the build tree at configure time + # TiledArray_LIBRETT target depends on existence of this directory to be usable from the build tree at configure time execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${EXTERNAL_SOURCE_DIR}/src") - # do install of cuTT as part of building TiledArray's install target + # do install of LibreTT as part of building TiledArray's install target install(CODE "execute_process( COMMAND \"${CMAKE_COMMAND}\" \"--build\" \".\" \"--target\" \"install\" WORKING_DIRECTORY \"${EXTERNAL_BUILD_DIR}\" RESULT_VARIABLE error_code) if(error_code) - message(FATAL_ERROR \"Failed to install cuTT\") + message(FATAL_ERROR \"Failed to install LibreTT\") endif() ") - # Add cuTT dependency to External - add_dependencies(External-tiledarray cutt-build) + # Add LibreTT dependency to External + add_dependencies(External-tiledarray librett-build) - set(_CUTT_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}) + set(_LIBRETT_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}) -endif(_CUTT_INSTALL_DIR) +endif(_LIBRETT_INSTALL_DIR) -add_library(TiledArray_CUTT INTERFACE) +add_library(TiledArray_LIBRETT INTERFACE) -set_target_properties(TiledArray_CUTT +set_target_properties(TiledArray_LIBRETT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "$;$" + "$;$" INTERFACE_LINK_LIBRARIES - "$;$" + "$;$" ) -install(TARGETS TiledArray_CUTT EXPORT tiledarray COMPONENT tiledarray) +install(TARGETS TiledArray_LIBRETT EXPORT tiledarray COMPONENT tiledarray) -#TODO test cuTT +#TODO test LibreTT diff --git a/external/versions.cmake b/external/versions.cmake index 12b2746796..b2eb0b3719 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -27,8 +27,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) set(TA_TRACKED_BTAS_TAG f1d9eaeaf8f88f54defec991d34c7790c6c45bb2) set(TA_TRACKED_BTAS_PREVIOUS_TAG 240b49b033864b34d74f2b8d6dd55f2ab524eae3) -set(TA_TRACKED_CUTT_TAG 0e8685bf82910bc7435835f846e88f1b39f47f09) -set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3) +set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da) +set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 7e27ac766a9038df6aa05613784a54a036c4b796) set(TA_TRACKED_UMPIRE_TAG f9640e0fa4245691cdd434e4f719ac5f7d455f82) set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2578b4eb1a..c0b69b9b32 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -201,7 +201,7 @@ if(CUDA_FOUND) list(APPEND TILEDARRAY_HEADER_FILES TiledArray/external/cuda.h - TiledArray/external/cutt.h + TiledArray/external/librett.h TiledArray/cuda/cublas.h TiledArray/cuda/btas_cublas.h TiledArray/cuda/btas_um_tensor.h @@ -250,7 +250,7 @@ if(CUDA_FOUND) LANGUAGE CUDA) # the list of libraries on which TiledArray depends on - list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_CUTT) + list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_LIBRETT) endif(CUDA_FOUND) diff --git a/src/TiledArray/cuda/btas_um_tensor.h b/src/TiledArray/cuda/btas_um_tensor.h index d6012f00f1..7bddc4a178 100644 --- a/src/TiledArray/cuda/btas_um_tensor.h +++ b/src/TiledArray/cuda/btas_um_tensor.h @@ -32,7 +32,7 @@ #include #include -#include +#include #include namespace TiledArray { @@ -95,7 +95,8 @@ namespace TiledArray { /// gemm /// -template >> +template >> btasUMTensorVarray gemm( const btasUMTensorVarray &left, const btasUMTensorVarray &right, Scalar factor, @@ -103,7 +104,8 @@ btasUMTensorVarray gemm( return btas_tensor_gemm_cuda_impl(left, right, factor, gemm_helper); } -template >> +template >> void gemm(btasUMTensorVarray &result, const btasUMTensorVarray &left, const btasUMTensorVarray &right, Scalar factor, @@ -159,8 +161,8 @@ btasUMTensorVarray shift(const btasUMTensorVarray &arg, /// shift to /// template -btasUMTensorVarray& shift_to(btasUMTensorVarray &arg, - const Index &range_shift) { +btasUMTensorVarray &shift_to(btasUMTensorVarray &arg, + const Index &range_shift) { const_cast(arg.range()).inplace_shift(range_shift); return arg; } @@ -187,8 +189,8 @@ btasUMTensorVarray permute(const btasUMTensorVarray &arg, std::move(storage)); // invoke the permute function - cutt_permute(const_cast(device_data(arg.storage())), - device_data(result.storage()), arg.range(), perm, stream); + librett_permute(const_cast(device_data(arg.storage())), + device_data(result.storage()), arg.range(), perm, stream); synchronize_stream(&stream); @@ -199,24 +201,29 @@ btasUMTensorVarray permute(const btasUMTensorVarray &arg, /// scale /// -template >> +template >> btasUMTensorVarray scale(const btasUMTensorVarray &arg, const Scalar factor) { detail::to_cuda(arg); return btas_tensor_scale_cuda_impl(arg, factor); } -template >> -btasUMTensorVarray& scale_to(btasUMTensorVarray &arg, const Scalar factor) { +template >> +btasUMTensorVarray &scale_to(btasUMTensorVarray &arg, + const Scalar factor) { detail::to_cuda(arg); btas_tensor_scale_to_cuda_impl(arg, factor); return arg; } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Range, typename Scalar, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray scale(const btasUMTensorVarray &arg, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = scale(arg, factor); // wait to finish before switch stream @@ -236,7 +243,9 @@ btasUMTensorVarray neg(const btasUMTensorVarray &arg) { return btas_tensor_scale_cuda_impl(arg, T(-1.0)); } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray neg(const btasUMTensorVarray &arg, const Perm &perm) { auto result = neg(arg); @@ -249,7 +258,7 @@ btasUMTensorVarray neg(const btasUMTensorVarray &arg, } template -btasUMTensorVarray& neg_to(btasUMTensorVarray &arg) { +btasUMTensorVarray &neg_to(btasUMTensorVarray &arg) { detail::to_cuda(arg); btas_tensor_scale_to_cuda_impl(arg, T(-1.0)); return arg; @@ -267,7 +276,8 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, return btas_tensor_subt_cuda_impl(arg1, arg2, T(1.0)); } -template >> +template >> btasUMTensorVarray subt(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Scalar factor) { @@ -276,7 +286,9 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, return result; } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray subt(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Perm &perm) { @@ -289,11 +301,13 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, return permute(result, perm); } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Scalar, typename Range, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray subt(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = subt(arg1, arg2, factor); // wait to finish before switch stream @@ -308,17 +322,20 @@ btasUMTensorVarray subt(const btasUMTensorVarray &arg1, /// template -btasUMTensorVarray& subt_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg1) { +btasUMTensorVarray &subt_to( + btasUMTensorVarray &result, + const btasUMTensorVarray &arg1) { detail::to_cuda(result); detail::to_cuda(arg1); btas_tensor_subt_to_cuda_impl(result, arg1, T(1.0)); return result; } -template >> -btasUMTensorVarray& subt_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg1, const Scalar factor) { +template >> +btasUMTensorVarray &subt_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg1, + const Scalar factor) { subt_to(result, arg1); btas_tensor_scale_to_cuda_impl(result, factor); return result; @@ -336,7 +353,8 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, return btas_tensor_add_cuda_impl(arg1, arg2, T(1.0)); } -template >> +template >> btasUMTensorVarray add(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Scalar factor) { @@ -345,11 +363,13 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, return result; } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Scalar, typename Range, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray add(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = add(arg1, arg2, factor); // wait to finish before switch stream @@ -359,7 +379,9 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, return permute(result, perm); } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray add(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Perm &perm) { @@ -377,17 +399,19 @@ btasUMTensorVarray add(const btasUMTensorVarray &arg1, /// template -btasUMTensorVarray& add_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg) { +btasUMTensorVarray &add_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg) { detail::to_cuda(result); detail::to_cuda(arg); btas_tensor_add_to_cuda_impl(result, arg, T(1.0)); return result; } -template >> -btasUMTensorVarray& add_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg, const Scalar factor) { +template >> +btasUMTensorVarray &add_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg, + const Scalar factor) { add_to(result, arg); btas_tensor_scale_to_cuda_impl(result, factor); return result; @@ -416,7 +440,8 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, return btas_tensor_mult_cuda_impl(arg1, arg2); } -template >> +template >> btasUMTensorVarray mult(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Scalar factor) { @@ -425,7 +450,9 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, return result; } -template >> +template < + typename T, typename Range, typename Perm, + typename = std::enable_if_t>> btasUMTensorVarray mult(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, const Perm &perm) { @@ -438,11 +465,13 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, return permute(result, perm); } -template && TiledArray::detail::is_permutation_v>> +template < + typename T, typename Range, typename Scalar, typename Perm, + typename = std::enable_if_t && + TiledArray::detail::is_permutation_v>> btasUMTensorVarray mult(const btasUMTensorVarray &arg1, const btasUMTensorVarray &arg2, - const Scalar factor, - const Perm &perm) { + const Scalar factor, const Perm &perm) { auto result = mult(arg1, arg2, factor); // wait to finish before switch stream @@ -456,17 +485,19 @@ btasUMTensorVarray mult(const btasUMTensorVarray &arg1, /// mult to /// template -btasUMTensorVarray& mult_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg) { +btasUMTensorVarray &mult_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg) { detail::to_cuda(result); detail::to_cuda(arg); btas_tensor_mult_to_cuda_impl(result, arg); return result; } -template >> -btasUMTensorVarray& mult_to(btasUMTensorVarray &result, - const btasUMTensorVarray &arg, const Scalar factor) { +template >> +btasUMTensorVarray &mult_to(btasUMTensorVarray &result, + const btasUMTensorVarray &arg, + const Scalar factor) { mult_to(result, arg); btas_tensor_scale_to_cuda_impl(result, factor); return result; diff --git a/src/TiledArray/external/eigen.h b/src/TiledArray/external/eigen.h index 6ee0eaea3f..cd2c50b522 100644 --- a/src/TiledArray/external/eigen.h +++ b/src/TiledArray/external/eigen.h @@ -46,7 +46,14 @@ TILEDARRAY_PRAGMA_GCC(system_header) #endif #include + +// disable warnings re: ignored attributes on template argument +// Eigen::PacketType::type +// {aka __vector(2) long long int} +TILEDARRAY_PRAGMA_GCC(diagnostic push) +TILEDARRAY_PRAGMA_GCC(diagnostic ignored "-Wignored-attributes") #include +TILEDARRAY_PRAGMA_GCC(diagnostic pop) #if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_LAPACKE_STRICT) #if !EIGEN_VERSION_AT_LEAST(3, 3, 7) diff --git a/src/TiledArray/external/cutt.h b/src/TiledArray/external/librett.h similarity index 76% rename from src/TiledArray/external/cutt.h rename to src/TiledArray/external/librett.h index a2a31ec20d..46d116c45b 100644 --- a/src/TiledArray/external/cutt.h +++ b/src/TiledArray/external/librett.h @@ -21,8 +21,8 @@ * */ -#ifndef TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED -#define TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED +#ifndef TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED +#define TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED #include @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include @@ -77,38 +77,39 @@ inline void permutation_to_col_major(std::vector& perm) { * @param stream the CUDA stream this permutation will be submitted to */ template -void cutt_permute(T* inData, T* outData, const TiledArray::Range& range, - const TiledArray::Permutation& perm, cudaStream_t stream) { +void librett_permute(T* inData, T* outData, const TiledArray::Range& range, + const TiledArray::Permutation& perm, cudaStream_t stream) { auto extent = range.extent(); std::vector extent_int(extent.begin(), extent.end()); - // cuTT uses FROM notation + // LibreTT uses FROM notation auto perm_inv = perm.inv(); std::vector perm_int(perm_inv.begin(), perm_inv.end()); - // cuTT uses ColMajor + // LibreTT uses ColMajor TiledArray::extent_to_col_major(extent_int); TiledArray::permutation_to_col_major(perm_int); - cuttResult_t status; + // librettResult_t status; + librettResult status; - cuttHandle plan; - status = cuttPlan(&plan, range.rank(), extent_int.data(), perm_int.data(), - sizeof(T), stream); + librettHandle plan; + status = librettPlan(&plan, range.rank(), extent_int.data(), perm_int.data(), + sizeof(T), stream); - TA_ASSERT(status == CUTT_SUCCESS); + TA_ASSERT(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, inData, outData); + status = librettExecute(plan, inData, outData); - TA_ASSERT(status == CUTT_SUCCESS); + TA_ASSERT(status == LIBRETT_SUCCESS); - status = cuttDestroy(plan); + status = librettDestroy(plan); - TA_ASSERT(status == CUTT_SUCCESS); + TA_ASSERT(status == LIBRETT_SUCCESS); } } // namespace TiledArray #endif // TILEDARRAY_HAS_CUDA -#endif // TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED +#endif // TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED diff --git a/src/TiledArray/initialize.h b/src/TiledArray/initialize.h index c86fa1d151..324f772ccf 100644 --- a/src/TiledArray/initialize.h +++ b/src/TiledArray/initialize.h @@ -60,10 +60,34 @@ inline World& initialize(int& argc, char**& argv, const MPI_Comm& comm, /// @} +#ifndef TA_SCOPED_INITIALIZE +/// calling this will initialize TA and then finalize it when leaving this scope +#define TA_SCOPED_INITIALIZE(args...) \ + TiledArray::initialize(args); \ + auto finalizer = TiledArray::scoped_finalizer(); +#endif + /// Finalizes TiledArray (and MADWorld runtime, if it had not been initialized /// when TiledArray::initialize was called). void finalize(); +namespace detail { +struct Finalizer { + ~Finalizer() noexcept; +}; +} // namespace detail + +/// creates an object whose destruction upon leaving this scope will cause +/// TiledArray::finalize to be called +detail::Finalizer scoped_finalizer(); + +#ifndef TA_FINALIZE_AFTER_LEAVING_THIS_SCOPE +/// calling this will cause TiledArray::finalize() to be called (if needed) +/// upon leaving this scope +#define TA_FINALIZE_AFTER_LEAVING_THIS_SCOPE() \ + auto finalizer = TiledArray::scoped_finalizer(); +#endif + void taskq_wait_busy(); void taskq_wait_yield(); void taskq_wait_usleep(int); diff --git a/src/TiledArray/tiledarray.cpp b/src/TiledArray/tiledarray.cpp index 088e4b9210..b4700ddec9 100644 --- a/src/TiledArray/tiledarray.cpp +++ b/src/TiledArray/tiledarray.cpp @@ -9,7 +9,7 @@ #ifdef TILEDARRAY_HAS_CUDA #include #include -#include +#include #endif #if TILEDARRAY_HAS_TTG @@ -29,16 +29,18 @@ inline void cuda_initialize() { cudaEnv::instance(); // cuBLASHandlePool::handle(); - // initialize cuTT - cuttInitialize(); + // initialize LibreTT + librettInitialize(); } /// finalize cuda environment inline void cuda_finalize() { CudaSafeCall(cudaDeviceSynchronize()); - cuttFinalize(); + librettFinalize(); cublasDestroy(cuBLASHandlePool::handle()); delete &cuBLASHandlePool::handle(); + // although TA::cudaEnv is a singleton, must explicitly delete it so + // that CUDA runtime is not finalized before the cudaEnv dtor is called cudaEnv::instance().reset(nullptr); } #endif @@ -173,6 +175,16 @@ void TiledArray::finalize() { finalized_accessor() = true; } +TiledArray::detail::Finalizer::~Finalizer() noexcept { + static std::mutex mtx; + std::scoped_lock lock(mtx); + if (TiledArray::initialized()) { + TiledArray::finalize(); + } +} + +TiledArray::detail::Finalizer TiledArray::scoped_finalizer() { return {}; } + void TiledArray::ta_abort() { SafeMPI::COMM_WORLD.Abort(); } void TiledArray::ta_abort(const std::string& m) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3335f9acb8..88ea115334 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -101,7 +101,7 @@ set(ta_test_src_files ta_test.cpp ) if(CUDA_FOUND) - list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp) + list(APPEND ta_test_src_files librett.cpp expressions_cuda_um.cpp tensor_um.cpp) endif() # if tiledarray library was compiled without exceptions, use TA header-only (see below) @@ -154,9 +154,11 @@ if(ENABLE_MPI) $ --log_level=unit_scope ${${executable}_np_${p}_args} ${MPIEXEC_POSTFLAGS} ) + # N.B. some CUDA unit tests require TA_CUDA_NUM_STREAMS=1 for now set_tests_properties(tiledarray/unit/run-np-${p} PROPERTIES FIXTURES_REQUIRED TA_UNIT_TESTS_EXEC - ENVIRONMENT MAD_NUM_THREADS=2) + ENVIRONMENT "MAD_NUM_THREADS=2;TA_CUDA_NUM_STREAMS=1" + ) if (p GREATER 1) set_tests_properties(tiledarray/unit/run-np-${p} PROPERTIES ENVIRONMENT TA_UT_DISTRIBUTED=1) @@ -165,7 +167,9 @@ if(ENABLE_MPI) else() add_test(NAME tiledarray/unit/run-np-1 COMMAND ${executable}) + # N.B. some CUDA unit tests require TA_CUDA_NUM_STREAMS=1 for now set_tests_properties(tiledarray/unit/run-np-1 PROPERTIES FIXTURES_REQUIRED TA_UNIT_TESTS_EXEC - ENVIRONMENT MAD_NUM_THREADS=2) + ENVIRONMENT "MAD_NUM_THREADS=2;TA_CUDA_NUM_STREAMS=1" + ) endif() diff --git a/tests/expressions_cuda_um.cpp b/tests/expressions_cuda_um.cpp index b03ec0c994..a17b749789 100644 --- a/tests/expressions_cuda_um.cpp +++ b/tests/expressions_cuda_um.cpp @@ -123,7 +123,7 @@ struct UMExpressionsFixture : public TiledRangeFixture { TArrayUMD u; TArrayUMD v; TArrayUMD w; - double tolerance = 1.0e-14; + static constexpr double tolerance = 5.0e-14; }; // UMExpressionsFixture // Instantiate static variables for fixture @@ -305,7 +305,8 @@ BOOST_AUTO_TEST_CASE(permute) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = b("c,b,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (a.is_local(perm_index)) { TArrayUMD::value_type a_tile = a.find(perm_index).get(); TArrayUMD::value_type perm_b_tile = permute_fn(b.find(i), perm); @@ -333,7 +334,8 @@ BOOST_AUTO_TEST_CASE(permute) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = b("b,c,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm2 * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm2 * b.tiles_range().idx(i)); if (a.is_local(perm_index)) { TArrayUMD::value_type a_tile = a.find(perm_index).get(); TArrayUMD::value_type perm_b_tile = permute_fn(b.find(i), perm2); @@ -350,7 +352,8 @@ BOOST_AUTO_TEST_CASE(scale_permute) { BOOST_REQUIRE_NO_THROW(a("a,b,c") = 2 * b("c,b,a")); for (std::size_t i = 0ul; i < b.size(); ++i) { - const std::size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const std::size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (a.is_local(perm_index)) { TArrayUMD::value_type a_tile = a.find(perm_index).get(); TArrayUMD::value_type perm_b_tile = permute_fn(b.find(i), perm); @@ -517,6 +520,30 @@ BOOST_AUTO_TEST_CASE(scal_block) { } } +BOOST_AUTO_TEST_CASE(scal_add_block) { + Permutation perm({2, 1, 0}); + BlockRange block_range(a.trange().tiles_range(), {3, 3, 3}, {5, 5, 5}); + + BOOST_REQUIRE_NO_THROW(c("a,b,c") = + 2 * (3 * a("a,b,c").block({3, 3, 3}, {5, 5, 5}) + + 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5}))); + + for (std::size_t index = 0ul; index < block_range.volume(); ++index) { + if (!a.is_zero(block_range.ordinal(index)) && + !b.is_zero(block_range.ordinal(index))) { + auto a_tile = a.find(block_range.ordinal(index)).get(); + auto b_tile = b.find(block_range.ordinal(index)).get(); + auto result_tile = c.find(index).get(); + + for (std::size_t j = 0ul; j < result_tile.range().volume(); ++j) { + BOOST_CHECK_EQUAL(result_tile[j], 2 * (3 * a_tile[j] + 4 * b_tile[j])); + } + } else { + BOOST_CHECK(c.is_zero(index)); + } + } +} + BOOST_AUTO_TEST_CASE(permute_block) { Permutation perm({2, 1, 0}); BlockRange block_range(a.trange().tiles_range(), {3, 3, 3}, {5, 5, 5}); @@ -524,7 +551,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { BOOST_REQUIRE_NO_THROW(c("a,b,c") = a("c,b,a").block({3, 3, 3}, {5, 5, 5})); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index))) { auto arg_tile = permute_fn(a.find(block_range.ordinal(perm_index)), perm); @@ -543,7 +571,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { 2 * a("c,b,a").block({3, 3, 3}, {5, 5, 5})); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index))) { auto arg_tile = permute_fn(a.find(block_range.ordinal(perm_index)), perm); @@ -563,7 +592,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { 4 * b("a,b,c").block({3, 3, 3}, {5, 5, 5}))); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index)) || !b.is_zero(block_range.ordinal(index))) { @@ -584,7 +614,8 @@ BOOST_AUTO_TEST_CASE(permute_block) { 4 * b("c,b,a").block({3, 3, 3}, {5, 5, 5}))); for (std::size_t index = 0ul; index < block_range.volume(); ++index) { - const size_t perm_index = c.range().ordinal(perm * c.range().idx(index)); + const size_t perm_index = + c.tiles_range().ordinal(perm * c.tiles_range().idx(index)); if (!a.is_zero(block_range.ordinal(perm_index)) || !b.is_zero(block_range.ordinal(perm_index))) { @@ -867,7 +898,8 @@ BOOST_AUTO_TEST_CASE(add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -879,7 +911,8 @@ BOOST_AUTO_TEST_CASE(add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -958,7 +991,8 @@ BOOST_AUTO_TEST_CASE(scale_add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -970,7 +1004,8 @@ BOOST_AUTO_TEST_CASE(scale_add_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1058,7 +1093,8 @@ BOOST_AUTO_TEST_CASE(subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1070,7 +1106,8 @@ BOOST_AUTO_TEST_CASE(subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1133,7 +1170,8 @@ BOOST_AUTO_TEST_CASE(scale_subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1145,7 +1183,8 @@ BOOST_AUTO_TEST_CASE(scale_subt_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1233,7 +1272,8 @@ BOOST_AUTO_TEST_CASE(mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1245,7 +1285,8 @@ BOOST_AUTO_TEST_CASE(mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -1308,7 +1349,8 @@ BOOST_AUTO_TEST_CASE(scale_mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = b.find(i).get(); @@ -1320,7 +1362,8 @@ BOOST_AUTO_TEST_CASE(scale_mult_permute) { for (std::size_t i = 0ul; i < c.size(); ++i) { TArrayUMD::value_type c_tile = c.find(i).get(); - const size_t perm_index = c.range().ordinal(perm * a.range().idx(i)); + const size_t perm_index = + c.tiles_range().ordinal(perm * a.tiles_range().idx(i)); TArrayUMD::value_type a_tile = permute_fn(a.find(perm_index), perm); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); @@ -2459,7 +2502,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { double expected = 0; for (std::size_t i = 0ul; i < a.size(); ++i) { TArrayUMD::value_type a_tile = a.find(i).get(); - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); TArrayUMD::value_type b_tile = permute_fn(b.find(perm_index), perm); for (std::size_t j = 0ul; j < a_tile.size(); ++j) @@ -2476,7 +2520,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); @@ -2495,7 +2540,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); @@ -2516,7 +2562,8 @@ BOOST_AUTO_TEST_CASE(dot_permute) { // Compute the expected value for the dot function. for (std::size_t i = 0ul; i < a.size(); ++i) { - const size_t perm_index = a.range().ordinal(perm * b.range().idx(i)); + const size_t perm_index = + a.tiles_range().ordinal(perm * b.tiles_range().idx(i)); if (!a.is_zero(i) && !b.is_zero(perm_index)) { auto a_tile = a.find(i).get(); auto b_tile = perm * b.find(perm_index).get(); diff --git a/tests/cutt.cpp b/tests/librett.cpp similarity index 81% rename from tests/cutt.cpp rename to tests/librett.cpp index 8a6b1af539..91c5b5b8ad 100644 --- a/tests/cutt.cpp +++ b/tests/librett.cpp @@ -27,8 +27,8 @@ #include #include "unit_test_config.h" -struct cuTTFixture { - // cuTTFixture() +struct LibreTTFixture { + // LibreTTFixture() // : A(100), // B(50), // C(20), @@ -36,16 +36,16 @@ struct cuTTFixture { // extent({100, 100}), // extent_nonsym({100, 50}), // perm({1, 0}) {} - cuTTFixture() : A(10), B(5), C(2) {} + LibreTTFixture() : A(10), B(5), C(2) {} int A; int B; int C; }; -BOOST_FIXTURE_TEST_SUITE(cutt_suite, cuTTFixture, TA_UT_LABEL_SERIAL); +BOOST_FIXTURE_TEST_SUITE(librett_suite, LibreTTFixture, TA_UT_LABEL_SERIAL); -BOOST_AUTO_TEST_CASE(cutt_gpu_mem) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem) { int* a_host = (int*)std::malloc(A * A * sizeof(int)); int* b_host = (int*)std::malloc(A * A * sizeof(int)); int iter = 0; @@ -68,17 +68,18 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); - cuttDestroy(plan); + BOOST_CHECK(status == LIBRETT_SUCCESS); + librettDestroy(plan); cudaMemcpy(b_host, b_device, A * A * sizeof(int), cudaMemcpyDeviceToHost); @@ -97,7 +98,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym) { int* a_host = (int*)std::malloc(A * B * sizeof(int)); int* b_host = (int*)std::malloc(A * B * sizeof(int)); int iter = 0; @@ -115,8 +116,9 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { cudaMemcpy(a_device, a_host, A * B * sizeof(int), cudaMemcpyHostToDevice); - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({B, A}); TiledArray::extent_to_col_major(extent); @@ -124,14 +126,14 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); - cuttDestroy(plan); + BOOST_CHECK(status == LIBRETT_SUCCESS); + librettDestroy(plan); cudaMemcpy(b_host, b_device, A * B * sizeof(int), cudaMemcpyDeviceToHost); @@ -150,7 +152,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_column_major) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_column_major) { int* a_host = (int*)std::malloc(A * B * C * sizeof(int)); int* b_host = (int*)std::malloc(A * B * C * sizeof(int)); int iter = 0; @@ -172,28 +174,29 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_column_major) { // b(j,i,k) = a(i,j,k) - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent3{int(A), int(B), int(C)}; std::vector perm3{1, 0, 2}; // std::vector perm3{0, 2, 1}; - status = cuttPlanMeasure(&plan, 3, extent3.data(), perm3.data(), sizeof(int), + status = librettPlanMeasure(&plan, 3, extent3.data(), perm3.data(), sizeof(int), 0, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); cudaMemcpy(b_host, b_device, A * B * C * sizeof(int), cudaMemcpyDeviceToHost); - status = cuttDestroy(plan); + status = librettDestroy(plan); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); iter = 0; for (std::size_t k = 0; k < C; k++) { @@ -212,7 +215,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_column_major) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { +BOOST_AUTO_TEST_CASE(librett_gpu_mem_nonsym_rank_three_row_major) { int* a_host = (int*)std::malloc(A * B * C * sizeof(int)); int* b_host = (int*)std::malloc(A * B * C * sizeof(int)); int iter = 0; @@ -234,8 +237,9 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { // b(j,i,k) = a(i,j,k) - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({A, B, C}); TiledArray::extent_to_col_major(extent); @@ -243,20 +247,20 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { std::vector perm({1, 0, 2}); TiledArray::permutation_to_col_major(perm); - status = cuttPlanMeasure(&plan, 3, extent.data(), perm.data(), sizeof(int), 0, + status = librettPlanMeasure(&plan, 3, extent.data(), perm.data(), sizeof(int), 0, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_device, b_device); + status = librettExecute(plan, a_device, b_device); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); cudaMemcpy(b_host, b_device, A * B * C * sizeof(int), cudaMemcpyDeviceToHost); - status = cuttDestroy(plan); + status = librettDestroy(plan); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); iter = 0; for (std::size_t i = 0; i < A; i++) { @@ -275,7 +279,7 @@ BOOST_AUTO_TEST_CASE(cutt_gpu_mem_nonsym_rank_three_row_major) { cudaFree(b_device); } -BOOST_AUTO_TEST_CASE(cutt_unified_mem) { +BOOST_AUTO_TEST_CASE(librett_unified_mem) { int* a_um; cudaMallocManaged(&a_um, A * A * sizeof(int)); @@ -290,8 +294,9 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem) { } } - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({A, A}); TiledArray::extent_to_col_major(extent); @@ -299,15 +304,15 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_um, b_um); + status = librettExecute(plan, a_um, b_um); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - cuttDestroy(plan); + librettDestroy(plan); cudaDeviceSynchronize(); @@ -323,7 +328,7 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem) { cudaFree(b_um); } -BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { +BOOST_AUTO_TEST_CASE(librett_unified_mem_nonsym) { int* a_um; cudaMallocManaged(&a_um, A * B * sizeof(int)); @@ -338,8 +343,9 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { } } - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; std::vector extent({B, A}); TiledArray::extent_to_col_major(extent); @@ -347,15 +353,15 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { std::vector perm({1, 0}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 2, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_um, b_um); + status = librettExecute(plan, a_um, b_um); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - cuttDestroy(plan); + librettDestroy(plan); cudaDeviceSynchronize(); iter = 0; @@ -369,7 +375,7 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_nonsym) { cudaFree(b_um); } -BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { +BOOST_AUTO_TEST_CASE(librett_unified_mem_rank_three) { int* a_um; cudaMallocManaged(&a_um, A * B * C * sizeof(int)); @@ -386,8 +392,9 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { } } - cuttHandle plan; - cuttResult_t status; + librettHandle plan; + //librettResult_t status; + librettResult status; // b(k,i,j) = a(i,j,k) @@ -397,15 +404,15 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { std::vector perm({2, 0, 1}); TiledArray::permutation_to_col_major(perm); - status = cuttPlan(&plan, 3, extent.data(), perm.data(), sizeof(int), 0); + status = librettPlan(&plan, 3, extent.data(), perm.data(), sizeof(int), 0); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - status = cuttExecute(plan, a_um, b_um); + status = librettExecute(plan, a_um, b_um); - BOOST_CHECK(status == CUTT_SUCCESS); + BOOST_CHECK(status == LIBRETT_SUCCESS); - cuttDestroy(plan); + librettDestroy(plan); cudaDeviceSynchronize(); iter = 0; @@ -421,7 +428,7 @@ BOOST_AUTO_TEST_CASE(cutt_unified_mem_rank_three) { cudaFree(b_um); } -BOOST_AUTO_TEST_CASE(cutt_um_tensor) { +BOOST_AUTO_TEST_CASE(librett_um_tensor) { TiledArray::Range range{A, A}; using Tile = TiledArray::btasUMTensorVarray; @@ -453,7 +460,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_nonsym) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_nonsym) { TiledArray::Range range{B, A}; using Tile = TiledArray::btasUMTensorVarray; @@ -485,7 +492,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_nonsym) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_three) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_rank_three) { TiledArray::Range range{A, B, C}; using Tile = TiledArray::btasUMTensorVarray; @@ -540,7 +547,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_three) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_four) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_rank_four) { std::size_t a = 2; std::size_t b = 3; std::size_t c = 6; @@ -609,7 +616,7 @@ BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_four) { } } -BOOST_AUTO_TEST_CASE(cutt_um_tensor_rank_six) { +BOOST_AUTO_TEST_CASE(librett_um_tensor_rank_six) { std::size_t a = 2; std::size_t b = 3; std::size_t c = 6; diff --git a/tests/tensor_um.cpp b/tests/tensor_um.cpp index 310e04234f..33efbfd7d4 100644 --- a/tests/tensor_um.cpp +++ b/tests/tensor_um.cpp @@ -87,8 +87,7 @@ struct TensorUMFixture { const TensorUMFixture::range_type TensorUMFixture::r = make_range(81); -BOOST_FIXTURE_TEST_SUITE(tensor_um_suite, TensorUMFixture, - TA_UT_LABEL_SERIAL) +BOOST_FIXTURE_TEST_SUITE(tensor_um_suite, TensorUMFixture, TA_UT_LABEL_SERIAL) BOOST_AUTO_TEST_CASE(default_constructor) { // check constructor @@ -98,7 +97,6 @@ BOOST_AUTO_TEST_CASE(default_constructor) { BOOST_CHECK(x.empty()); // Check that range data is correct - BOOST_CHECK_EQUAL(x.data(), static_cast(NULL)); BOOST_CHECK_EQUAL(x.size(), 0ul); BOOST_CHECK_EQUAL(x.range().volume(), 0ul); diff --git a/tests/tot_dist_array_part2.cpp b/tests/tot_dist_array_part2.cpp index fa13819fee..b916812884 100644 --- a/tests/tot_dist_array_part2.cpp +++ b/tests/tot_dist_array_part2.cpp @@ -255,7 +255,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(trange, TestParam, test_params) { } } -BOOST_AUTO_TEST_CASE_TEMPLATE(range, TestParam, test_params) { +BOOST_AUTO_TEST_CASE_TEMPLATE(tiles_range, TestParam, test_params) { { tensor_type t; if (m_world.nproc() == 1)