Skip to content

Commit

Permalink
Test with CUDA-aware MPI in CI
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Dec 29, 2022
2 parents 7f1711d + eab3bca commit 10e4a84
Show file tree
Hide file tree
Showing 147 changed files with 3,097 additions and 1,304 deletions.
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Jenkinsfile export-ignore
/tests/* export-ignore
/tests/CMakeLists.txt -export-ignore
/tests/quick_tests -export-ignore
/tests/run_test.cmake -export-ignore
/tests/run_*.cmake -export-ignore
/tests/tests.h -export-ignore

#
Expand Down
17 changes: 1 addition & 16 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ jobs:
libraries: container iostreams python serialization system thread
platform: x64
configuration: Release
- name: Install mold linker
uses: rui314/setup-mold@v1
with:
make-default: false
- name: configure deal.II
run: |
mkdir build
Expand Down Expand Up @@ -89,10 +85,6 @@ jobs:
petsc-dev \
libmetis-dev \
libhdf5-mpi-dev
- name: Install mold linker
uses: rui314/setup-mold@v1
with:
make-default: false
- name: info
run: |
mpicc -v
Expand Down Expand Up @@ -168,10 +160,6 @@ jobs:
numdiff \
openmpi-bin \
libboost-all-dev
- name: Install mold linker
uses: rui314/setup-mold@v1
with:
make-default: false
- name: info
run: |
mpicc -v
Expand Down Expand Up @@ -206,6 +194,7 @@ jobs:
-D DEAL_II_WITH_KOKKOS="ON" \
-D KOKKOS_DIR=${GITHUB_WORKSPACE}/../kokkos-install \
-D DEAL_II_WITH_MPI="ON" \
-D DEAL_II_MPI_WITH_DEVICE_SUPPORT="ON" \
-D DEAL_II_WITH_P4EST="ON" \
-D DEAL_II_COMPONENT_EXAMPLES="ON" \
..
Expand Down Expand Up @@ -266,10 +255,6 @@ jobs:
intel-oneapi-mkl-devel \
intel-oneapi-tbb-devel
sudo apt-get clean
- name: Install mold linker
uses: rui314/setup-mold@v1
with:
make-default: false
- name: info
run: |
source /opt/intel/oneapi/setvars.sh
Expand Down
8 changes: 0 additions & 8 deletions .github/workflows/osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ jobs:
run: |
g++ -v
cmake --version
- name: Install mold linker
uses: rui314/setup-mold@v1
with:
make-default: false
- name: configure
run: |
cmake -D CMAKE_BUILD_TYPE=Debug -D DEAL_II_CXX_FLAGS='-Werror' -D DEAL_II_EARLY_DEPRECATIONS=ON .
Expand Down Expand Up @@ -68,10 +64,6 @@ jobs:
#export OMPI_CXX=g++
#export OMPI_CC=gcc
#export OMPI_FC=gfortran
- name: Install mold linker
uses: rui314/setup-mold@v1
with:
make-default: false
- name: info
run: |
mpicxx -v
Expand Down
16 changes: 12 additions & 4 deletions cmake/macros/macro_deal_ii_add_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -571,11 +571,19 @@ function(deal_ii_add_test _category _test_name _comparison_file)
set_tests_properties(${_test_full} PROPERTIES PROCESSORS ${_slots})
endif()

if(NOT "${_n_cpu}${_n_threads}" STREQUAL "00")
#
# Serialize all tests that share a common executable target. This
# involves tests with .threads=N. and .mpirun=N. annotation, as well
# as tests with parameter files (that might share a common executable
# target).
#
if( NOT "${_n_cpu}${_n_threads}" STREQUAL "00" OR
"${_source_file}" MATCHES "(prm|json)$" )
#
# Running multiple variants in parallel triggers a race condition
# where the same (not yet existent) executable is built
# concurrently leading to undefined outcomes.
# Running multiple variants of tests with the same target
# executable in parallel triggers a race condition where the same
# (not yet existent) target is built concurrently leading to
# undefined outcomes.
#
# Luckily CMake has a mechanism to force a test to be run after
# another has finished (and both are scheduled):
Expand Down
1 change: 1 addition & 0 deletions contrib/ci/Jenkinsfile.mpi
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ pipeline
$WORKSPACE/
time ninja -j 10 # 12 gives OOM
time ninja test # quicktests
time ctest --rerun-failed --output-on-failure -R quick_tests/
time ninja setup_tests
time ctest -R "all-headers|multigrid/transfer|matrix_free/matrix_" --output-on-failure -j $NP --no-compress-output -T test
'''
Expand Down
139 changes: 139 additions & 0 deletions contrib/ci/Jenkinsfile.serial
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!groovy

/*
This Jenkins job compiles and tests deal.II with MPI enabled
See https://ci.tjhei.info/job/dealii-serial/ for details.
*/

/*
Settings to apply inside Jenkins:
- discover pull requests (remove branches/master)
- Strategy: merged PR
- enable "Disable GitHub Multibranch Status Plugin"
- trigger build on pull request comment: .* /rebuild.* (without space!)
- Jenkinsfile: choose contrib/ci/Jenkinsfile.serial
- scan: every 4 hours
- discard: 5+ items
*/

// load library https://github.com/tjhei/jenkins-stuff to provide
// killold.killOldBuilds() function:
@Library('tjhei') _

pipeline
{
agent none

stages
{
stage("abort old")
{
agent none
steps
{
githubNotify context: 'Jenkins: serial', description: 'initializing...', status: 'PENDING'
// kill older builds in this PR:
script { killold.killOldBuilds() }
}
}

stage("main")
{
agent
{
docker
{
image 'tjhei/candi:v9.4.1-r1-ubuntu20.04'
}
}

post { cleanup { cleanWs() } }

stages
{
stage("check")
{
when {
not {
anyOf {
branch 'master'
branch pattern: "dealii-*", comparator: "GLOB"
}
}
}

steps
{
githubNotify context: 'Jenkins: serial', description: 'pending...', status: 'PENDING'
sh '''
wget -q -O - https://api.github.com/repos/dealii/dealii/issues/${CHANGE_ID}/labels | grep 'ready to test' || \
{ echo "This commit will only be tested when it has the label 'ready to test'. Trigger a rebuild by adding a comment that contains '/rebuild'..."; exit 1; }
'''
}
post
{
failure
{
githubNotify context: 'Jenkins: serial', description: 'need ready to test label and /rebuild', status: 'PENDING'
script
{
currentBuild.result='NOT_BUILT'
}
}
}
}

stage('build')
{
steps
{
timeout(time: 5, unit: 'HOURS')
{
sh "echo \"building on node ${env.NODE_NAME}\""
sh '''#!/bin/bash
set -e
set -x
export TEST_TIME_LIMIT=1200
export NP=`grep -c ^processor /proc/cpuinfo`
mkdir -p /home/dealii/build
cd /home/dealii/build
cmake -G "Ninja" \
-D DEAL_II_CXX_FLAGS='-Werror' \
-D DEAL_II_CXX_FLAGS_DEBUG='-Og' \
-D DEAL_II_EARLY_DEPRECATIONS=ON \
-D CMAKE_BUILD_TYPE=Debug \
-D DEAL_II_WITH_MPI=OFF \
-D DEAL_II_UNITY_BUILD=ON \
$WORKSPACE/
time ninja -j 10 # 12 gives OOM
time ninja test # quicktests
time ninja setup_tests
time ctest --output-on-failure -j $NP --no-compress-output -T test
'''
githubNotify context: 'Jenkins: serial', description: 'OK', status: 'SUCCESS'
}
}

post
{
always
{
sh "cp /home/dealii/build/Testing/*/*.xml $WORKSPACE/.xml || true"
xunit tools: [CTest(pattern: '*.xml')]
sh "cp /home/dealii/build/detailed.log $WORKSPACE/detailed-serial.log || true"
archiveArtifacts artifacts: 'detailed-serial.log', fingerprint: true
}

failure
{
githubNotify context: 'Jenkins: serial', description: 'build failed', status: 'FAILURE'
}
}
}

}
}
}
}
12 changes: 8 additions & 4 deletions doc/news/changes/minor/20221129Maier
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Improved: The quick_tests mechanism has been redesigned. The tests are now
called <code>quick_tests/[tests].[build]</code> and can be invoked via
ctest as well.
Improved: The quick_tests mechanism has been redesigned. Quick tests are
now part of the regular deal.II testsuite. This means they can be
configured via the <code>setup_tests_quick_tests</code> target, and run via
invoking ctest from the build directory. The <code>test</code> target will
now ensure that the library is fully compiled and quick tests are
configured prior to running all quick tests.

<br>
(Matthias Maier, 2022/11/29)
(Matthias Maier, 2022/11/29, 2022/12/10)

5 changes: 5 additions & 0 deletions doc/news/changes/minor/20221213StefanoZampini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fixed: PETSc has no concept of ownership, but only shared ownership. Vec and Mat objects are
reference counted, and automatically cleaned when no longer used. We do not need to keep track
of ownership manually.
<br>
(Stefano Zampini, 2022/12/13)
3 changes: 3 additions & 0 deletions doc/news/changes/minor/20221213StefanoZampini-b
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed: Add default MPI getter for PETSc Mat objects, that queries the underlying PETSc type.
<br>
(Stefano Zampini, 2022/12/13)
3 changes: 3 additions & 0 deletions doc/news/changes/minor/20221220Feder
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Improved: FEInterfaceValues objects can now also be constructed using hp-collections.
<br>
(Marco Feder, 2022/12/20)
4 changes: 4 additions & 0 deletions doc/news/changes/minor/20221220StefanoZampini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fixed: Small fixes to commnicator handling in PETSc classes. Move from
GetArray to GetArrayRead (threadsafe version).
<br>
(Stefano Zampini, 2022/12/20)
3 changes: 3 additions & 0 deletions doc/news/changes/minor/20221220StefanoZampini-b
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
New: PETScWrappers:BlockSparseMatrix now is also a PETSc MATNEST type.
<br>
(Stefano Zampini, 2022/12/20)
10 changes: 10 additions & 0 deletions doc/news/changes/minor/20221228Pelteret
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Improved: The FEInterfaceValues values public interface has been extended
further to support hp-FEM. Functions have been added to report if the class has
been initialized with hp support, and to get the underlying
hp::MappingCollection, hp::FECollection and hp::QCollection. The various
FEInterfaceValues::reinit() methods now accept indices to indicate which
quadrature rule, mapping (and, in some cases, finite elements) should be used to
compute values across the interface.
<br>
(Jean-Paul Pelteret, 2022/12/28)

2 changes: 1 addition & 1 deletion examples/step-18/doc/intro.dox
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ these equations can be simplified to
&=
(\mathbf{f}, \varphi)_{\Omega(t_{n-1})}
-(\sigma^{n-1},\varepsilon(\varphi))_{\Omega(t_{n-1})}
+(\mathbf{b}(\mathbf{x},t_n),t_{n-1}), \varphi)_{\Gamma_N}
+(\mathbf{b}(\mathbf{x},t_n), \varphi)_{\Gamma_N}
\\
&\qquad\qquad
\forall \varphi \in \{\mathbf{v}\in H^1(\Omega(t_{n-1}))^d: \mathbf{v}|_{\Gamma_D}=0\}.
Expand Down
2 changes: 1 addition & 1 deletion examples/step-22/doc/intro.dox
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ we largely follow the paper D. Silvester and A. Wathen:
"Fast iterative solution of stabilised Stokes systems part II. Using
general block preconditioners." (SIAM J. Numer. Anal., 31 (1994),
pp. 1352-1367), which is available online <a
href="http://siamdl.aip.org/getabs/servlet/GetabsServlet?prog=normal&id=SJNAAM000031000005001352000001&idtype=cvips&gifs=Yes" target="_top">here</a>.
href="https://doi.org/10.1137/0731070">here</a>.
Principally, the difference in the matrix at the heart of the Schur
complement has two consequences:

Expand Down
12 changes: 6 additions & 6 deletions include/deal.II/base/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ class TensorFunction;
* @tparam dim The space dimension of the range space within which the domain
* $\Omega$ of the function lies. Consequently, the function will be
* evaluated at objects of type @p Point<dim>.
* @tparam RangeNumberType The scalar type of the vector space that is the range
* (or image) of this function. As discussed above, objects of the current
* type represent functions from ${\mathbb R}^\text{dim}$ to
* $S^{n_\text{components}}$ where $S$ is the underlying scalar type of
* the vector space. The type of $S$ is given by the @p RangeNumberType template
* argument.
* @tparam RangeNumberType The scalar type of the vector space that is
* the range (or image) of this function. As discussed above,
* objects of the current type represent functions from ${\mathbb
* R}^\text{dim}$ to $S^{n_\text{components}}$ where $S$ is the
* underlying scalar type of the vector space. The type of $S$ is
* given by the @p RangeNumberType template argument.
*
* @ingroup functions
*/
Expand Down
4 changes: 2 additions & 2 deletions include/deal.II/base/kokkos.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

DEAL_II_NAMESPACE_OPEN

namespace Impl
namespace internal
{
/**
* Records if Kokkos has been initialized by deal.II. The value stored is only
Expand All @@ -33,7 +33,7 @@ namespace Impl
*/
void
ensure_kokkos_initialized();
} // namespace Impl
} // namespace internal

DEAL_II_NAMESPACE_CLOSE

Expand Down
6 changes: 5 additions & 1 deletion include/deal.II/base/memory_space.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ namespace MemorySpace
};

/**
* Structure describing Default memory space.
* Structure describing the default memory space. If Kokkos was configured
* with a GPU backend, the default memory space is the one corresponding to
* that backend. Otherwise, the default memory space is the the same as the
* Host memory space.
*/
struct Default
{
Expand All @@ -48,6 +51,7 @@ namespace MemorySpace
/**
* Structure describing CUDA memory space.
*/
// FIXME Only enable if CUDA is enabled in deal.II.
using CUDA = Default;

} // namespace MemorySpace
Expand Down

0 comments on commit 10e4a84

Please sign in to comment.